Source code for skosprovider_heritagedata.utils

# -*- coding: utf-8 -*-
'''
Utility functions for :mod:`skosprovider_heritagedata`.
'''

import requests
from skosprovider.skos import (
    Concept,
    Label,
    Note,
    ConceptScheme)
from skosprovider.exceptions import ProviderUnavailableException

import logging
import sys

log = logging.getLogger(__name__)

PY3 = sys.version_info[0] == 3

if PY3:  # pragma: no cover
    binary_type = bytes
else:  # pragma: no cover
    binary_type = str

import rdflib
from rdflib.term import URIRef
from rdflib.namespace import RDF, SKOS, DC, RDFS

PROV = rdflib.Namespace('http://www.w3.org/ns/prov#')

[docs]def conceptscheme_from_uri(conceptscheme_uri): ''' Read a SKOS Conceptscheme from a :term:`URI` :param string conceptscheme_uri: URI of the conceptscheme. :rtype: skosprovider.skos.ConceptScheme ''' graph = uri_to_graph('%s.rdf' % (conceptscheme_uri)) # get the conceptscheme conceptscheme = ConceptScheme(conceptscheme_uri) conceptscheme.notes = [] conceptscheme.labels = [] if graph is not False: for s, p, o in graph.triples((URIRef(conceptscheme_uri), RDFS.label, None)): label = Label(o.toPython(), "prefLabel", 'en') conceptscheme.labels.append(label) return conceptscheme
[docs]def things_from_graph(graph, concept_scheme): ''' Read concepts and collections from a graph. :param rdflib.Graph graph: Graph to read from. :param skosprovider.skos.ConceptScheme concept_scheme: Conceptscheme the concepts and collections belong to. :rtype: :class:`list` ''' clist = [] for sub, pred, obj in graph.triples((None, RDF.type, SKOS.Concept)): uri = str(sub) con = Concept(_split_uri(uri, 1), uri=uri) con.broader = _create_from_subject_predicate(graph, sub, SKOS.broader) con.narrower = _create_from_subject_predicate(graph, sub, SKOS.narrower) con.related = _create_from_subject_predicate(graph, sub, SKOS.related) con.labels = _create_from_subject_typelist(graph, sub, Label.valid_types) con.notes = _create_from_subject_typelist(graph, sub, Note.valid_types) con.subordinate_arrays = [] con.concept_scheme = concept_scheme clist.append(con) # at this moment, Heritagedata does not support SKOS.Collection # for sub, pred, obj in graph.triples((None, RDF.type, SKOS.Collection)): # uri = str(sub) # col = Collection(_split_uri(uri, 1), uri=uri) # col.members = _create_from_subject_predicate(sub, SKOS.member) # col.labels = _create_from_subject_typelist(sub, Label.valid_types) # col.notes = _create_from_subject_typelist(sub, Note.valid_types) # clist.append(col) return clist
def _create_from_subject_typelist(graph, subject, typelist): list = [] for p in typelist: term = SKOS.term(p) list.extend(_create_from_subject_predicate(graph, subject, term)) return list def _create_from_subject_predicate(graph, subject, predicate): list = [] for s, p, o in graph.triples((subject, predicate, None)): type = predicate.split('#')[-1] if Label.is_valid_type(type): o = _create_label(o, type) elif Note.is_valid_type(type): o = _create_note(o, type) else: o = _split_uri(o, 1) if o: list.append(o) return list def _create_label(literal, type): language = literal.language if language is None: return 'und' # return undefined code when no language return Label(literal.toPython(), type, language) def _create_note(literal, type): if not Note.is_valid_type(type): raise ValueError('Type of Note is not valid.') return Note(text_(literal.value, encoding="utf-8"), type, _get_language_from_literal(literal)) def _get_language_from_literal(data): if data.language is None: return 'und' # return undefined code when no language return text_(data.language, encoding="utf-8") def _split_uri(uri, index): return uri.strip('/').rsplit('/', 1)[index]
[docs]def uri_to_graph(uri): ''' :param string uri: :term:`URI` where the RDF data can be found. :rtype: rdflib.Graph :raises skosprovider.exceptions.ProviderUnavailableException: if the heritagedata.org services are down ''' graph = rdflib.Graph() try: res = requests.get(uri) except requests.ConnectionError as e: raise ProviderUnavailableException("URI not available: %s" % uri) if res.status_code == 404: return False graph.parse(data=res.content) #heritagedata.org returns a empy page/graph when a resource does not exists (statsu_code 200). For this reason we return False if the graph is empty if len(graph) == 0: return False return graph
[docs]def text_(s, encoding='latin-1', errors='strict'): """ If ``s`` is an instance of ``binary_type``, return ``s.decode(encoding, errors)``, otherwise return ``s``""" if isinstance(s, binary_type): return s.decode(encoding, errors) return s