Source code for skosprovider_heritagedata.providers

# -*- coding: utf-8 -*-
'''
This module implements a :class:`skosprovider.providers.VocabularyProvider`
for http://www.heritagedata.org.
'''

import requests
from requests.exceptions import ConnectionError

import warnings
import logging
log = logging.getLogger(__name__)

from language_tags import tags
from rdflib.namespace import SKOS

from skosprovider.exceptions import ProviderUnavailableException
from skosprovider.providers import VocabularyProvider

from skosprovider_heritagedata.utils import (
    _split_uri, 
    uri_to_graph,
    conceptscheme_from_uri,
    things_from_graph
)

[docs]class HeritagedataProvider(VocabularyProvider): """A provider that can work with the Heritagedata services of http://www.heritagedata.org/blog/services/ """ def __init__(self, metadata, **kwargs): """ Constructor of the :class:`skosprovider_heritagedata.providers.HeritagedataProvider` :param (dict) metadata: metadata of the provider :param kwargs: arguments defining the provider. * Typical argument is `scheme_uri`. The `scheme_uri` is a composition of the `base_scheme_uri` and `scheme_id` * The :class:`skosprovider_Heritagedata.providers.HeritagedataProvider` is the default :class:`skosprovider_Heritagedata.providers.HeritagedataProvider` """ if not 'default_language' in metadata: metadata['default_language'] = 'en' if 'scheme_uri' in kwargs: self.base_scheme_uri = _split_uri(kwargs['scheme_uri'], 0) self.scheme_id = _split_uri(kwargs['scheme_uri'], 1) else: self.base_scheme_uri = 'http://purl.org/heritagedata/schemes' self.scheme_id = 'eh_period' self.scheme_uri = self.base_scheme_uri + "/" + self.scheme_id if 'service_scheme_uri' in kwargs: self.service_scheme_uri = kwargs['service_scheme_uri'].strip('/') else: self.service_scheme_uri = "http://heritagedata.org/live/services" concept_scheme = conceptscheme_from_uri(self.scheme_uri) super(HeritagedataProvider, self).__init__(metadata, concept_scheme=concept_scheme, **kwargs) def _get_language(self, **kwargs): if 'language' in kwargs: return kwargs['language'] return self.metadata['default_language']
[docs] def get_by_id(self, id): """ Get a :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection` by id :param (str) id: integer id of the :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept` :return: corresponding :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept`. Returns False if non-existing id """ graph = uri_to_graph('%s/%s/%s.rdf' % (self.scheme_uri, "concepts", id)) if graph is False: return False # get the concept things = things_from_graph(graph, self.concept_scheme) if len(things) == 0: return False c = things[0] return c
[docs] def get_by_uri(self, uri): """ Get a :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection` by uri :param (str) uri: string uri of the :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept` :return: corresponding :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept`. Returns False if non-existing id """ id = _split_uri(uri, 1) return self.get_by_id(id)
[docs] def find(self, query, **kwargs): '''Find concepts that match a certain query. Currently query is expected to be a dict, so that complex queries can be passed. You can use this dict to search for concepts or collections with a certain label, with a certain type and for concepts that belong to a certain collection. .. warning:: The underlying service returns labels without specifying if they are prefLabels or altLabels. For a certain concept several labels are returned. This method does not return labels, but returns concepts. When multiple labels are detected for a single concept, only one label is attached to this concept. Since no information is present about the type of this label, this can be an altLabel for a concept where a prefLabel exists. .. code-block:: python # Find anything that has a label of church. provider.find({'label': 'church'} # Find all concepts that are a part of collection 5. provider.find({'type': 'concept', 'collection': {'id': 5}) # Find all concepts, collections or children of these # that belong to collection 5. provider.find({'collection': {'id': 5, 'depth': 'all'}) :param query: A dict that can be used to express a query. The following keys are permitted: * `label`: Search for something with this label value. An empty \ label is equal to searching for all concepts. * `type`: Limit the search to certain SKOS elements. If not \ present `all` is assumed: * `concept`: Only return :class:`skosprovider.skos.Concept` \ instances. * `collection`: Only return \ :class:`skosprovider.skos.Collection` instances. * `all`: Return both :class:`skosprovider.skos.Concept` and \ :class:`skosprovider.skos.Collection` instances. * `collection`: Search only for concepts belonging to a certain \ collection. This argument should be a dict with two keys: * `id`: The id of a collection. Required. * `depth`: Can be `members` or `all`. Optional. If not \ present, `members` is assumed, meaning only concepts or \ collections that are a direct member of the collection \ should be considered. When set to `all`, this method \ should return concepts and collections that are a member \ of the collection or are a narrower concept of a member \ of the collection. :returns: A :class:`lst` of concepts and collections. Each of these is a dict with the following keys: * id: id within the conceptscheme * uri: :term:`uri` of the concept or collection * type: concept or collection * label: A label to represent the concept or collection. It is \ determined by looking at the `**kwargs` parameter, the default \ language of the provider and finally falls back to `en`. ''' # # interprete and validate query parameters (label, type and collection) # Label label = None if 'label' in query: label = query['label'] # Type: 'collection','concept' or 'all' type_c = 'all' if 'type' in query: type_c = query['type'] if type_c == 'collection': warnings.warn("This provider doesn't support collections at the moment of implementation because Heritagedata doesn't use SKOS:Collection.", UserWarning) return [] if type_c not in ('all', 'concept', 'collection'): raise ValueError("type: only the following values are allowed: 'all', 'concept', 'collection'") #collection if 'collection' in query: warnings.warn("This provider doesn't support collections at the moment of implementation because Heritagedata doesn't use SKOS:Collection.", UserWarning) raise ValueError('You are searching for items in an unexisting collection.') params = {'schemeURI': self.scheme_uri, 'contains': label} return self._get_items("getConceptLabelMatch", params, **kwargs)
[docs] def get_all(self, **kwargs): """ Not supported: This provider does not support this. The amount of results is too large """ warnings.warn( 'This provider does not support this. The amount of results is too large', UserWarning ) return False
[docs] def get_top_concepts(self, **kwargs): """ Returns all concepts that form the top-level of a display hierarchy. :return: A :class:`lst` of concepts. """ #Collections are not used in Heritagedata so get_top_concepts() equals get_top_display() return self.get_top_display(**kwargs)
[docs] def get_top_display(self, **kwargs): """ Returns all concepts or collections that form the top-level of a display hierarchy. :return: A :class:`lst` of concepts and collections. """ params = {'schemeURI': self.scheme_uri} return self._get_items("getTopConceptsForScheme", params, **kwargs)
[docs] def get_children_display(self, id, **kwargs): """ Return a list of concepts or collections that should be displayed under this concept or collection. :param str id: A concept or collection id. :returns: A :class:`lst` of concepts and collections. """ params = {'conceptURI': self.scheme_uri + "/concepts/" + id} return self._get_items("getConceptRelations", params, **kwargs)
[docs] def expand(self, id): """ Expand a concept or collection to all it's narrower concepts. If the id passed belongs to a :class:`skosprovider.skos.Concept`, the id of the concept itself should be include in the return value. :param str id: A concept or collection id. :returns: A :class:`lst` of id's. Returns false if the input id does not exists """ expanded = [] expanded.append(id) expanded.extend(self._get_children(id, all=True)) if len(expanded) == 1: if self.get_by_id(id) is False: return False return expanded
def _get_children(self, id, all=False): #If all=True this method works recursive request = self.service_scheme_uri + "/getConceptRelations" res = requests.get(request, params={'conceptURI': self.scheme_uri + "/concepts/" + id}) res.encoding = 'utf-8' result = res.json() answer = [] for r in result: if r['property'] == str(SKOS.narrower): child_id = _split_uri(r["uri"], 1) answer.append(child_id) if all is True: child_list = self._get_children(child_id, all=True) if child_list is not False: answer.extend(child_list) return answer def _get_items(self, service, params, **kwargs): # send request to Heritagedata """ Returns the results of a service method to a :class:`lst` of concepts (and collections). The return :class:`lst` can be empty. :param service (str): service method :returns: A :class:`lst` of concepts (and collections). Each of these is a dict with the following keys: * id: id within the conceptscheme * uri: :term:`uri` of the concept or collection * type: concept or collection * label: A label to represent the concept or collection. """ request = self.service_scheme_uri + "/" + service try: res = requests.get(request, params=params) except ConnectionError as e: raise ProviderUnavailableException("Request could not be executed - Request: %s - Params: %s" % (request, params)) if res.status_code == 404: raise ProviderUnavailableException("Service not found (status_code 404) - Request: %s - Params: %s" % (request, params)) res.encoding = 'utf-8' result = res.json() d = {} for r in result: uri = r['uri'] label = None if 'label' in r.keys(): label = r['label'] language = None if 'label lang' in r.keys(): language = r['label lang'] property = None if 'property' in r.keys(): property = r['property'] if not service == 'getConceptRelations' or property == str(SKOS.narrower): item = { 'id': _split_uri(uri, 1), 'uri': uri, 'type': 'concept', 'label': label, 'lang': language } if uri not in d: d[uri] = item if tags.tag(d[uri]['lang']).format == tags.tag(self._get_language(**kwargs)).format: pass elif tags.tag(item['lang']).format == tags.tag(self._get_language(**kwargs)).format: d[uri] = item elif tags.tag(item['lang']).language and (tags.tag(item['lang']).language.format == tags.tag(self._get_language(**kwargs)).language.format): d[uri] = item elif tags.tag(item['lang']).format == 'en': d[uri] = item return list(d.values())