Skip to content

Commit

Permalink
Merge branch '138-vocabulary-terms' into 'dev.publicamundi.eu'
Browse files Browse the repository at this point in the history
  • Loading branch information
drmalex07 committed Jun 7, 2015
2 parents c84a49b + d533046 commit 66db577
Show file tree
Hide file tree
Showing 13 changed files with 164 additions and 76 deletions.
8 changes: 4 additions & 4 deletions ckanext/publicamundi/controllers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,15 @@ def vocabulary_get(self, name):
name = str(name)
r = None

vocab = vocabularies.get_by_name(name)
vocab = vocabularies.get_by_name(name)
if vocab:
terms = vocab['vocabulary'].by_value
r = {
'date_type': vocab.get('date_type'),
'reference_date': vocab.get('reference_date'),
'title': vocab.get('title'),
'name': vocab.get('name'),
'terms': [{ 'value': k, 'title': terms[k].title } for k in terms],
'terms': [{'token': t.token, 'value': t.value, 'title': t.title}
for t in vocab['vocabulary']],
}

response.headers['Content-Type'] = content_types['json']
Expand All @@ -149,7 +149,7 @@ def dataset_export(self, name_or_id):
return

def dataset_import(self):

post = request.params

# Forward to the dataset_import action
Expand Down
4 changes: 2 additions & 2 deletions ckanext/publicamundi/lib/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def flatten_field(field):
'Only zope.schema.Choice supported for key_type'
res = {}
res1 = flatten_field(field.value_type)
for v in field.key_type.vocabulary:
for t in field.key_type.vocabulary:
for k1, field1 in res1.items():
res[(v.token,) + k1] = field1
res[(t.value,) + k1] = field1
else:
res = { (): field }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def check_keywords(obj):
raise zope.interface.Invalid(
'You need to select at least one keyword from INSPIRE data themes')

free_keywords = zope.schema.List(
title= u'Free Keywords',
description = u"The keyword value is a commonly used word, formalised word or phrase used to describe the subject. While the topic category is too coarse for detailed queries, keywords help narrowing a full text search and they allow for structured keyword search.",
required = False,
max_length = 10,
value_type = zope.schema.Object(IFreeKeyword,
title = u'Free Keyword'))
free_keywords.setTaggedValue('format:markup', { 'descend-if-dictized': False })

# Geographic

bounding_box = zope.schema.List(
Expand Down
11 changes: 11 additions & 0 deletions ckanext/publicamundi/lib/metadata/types/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ class FreeKeyword(Object):
reference_date = None
date_type = None

@classmethod
def normalize_keyword(cls, s):
from inflection import dasherize, underscore
return dasherize(underscore(unicode(s)))

def __init__(self, **kwargs):
value = kwargs.get('value')
if value:
kwargs['value'] = self.normalize_keyword(value)
super(FreeKeyword, self).__init__(**kwargs)

@object_null_adapter()
class GeographicBoundingBox(Object):

Expand Down
2 changes: 1 addition & 1 deletion ckanext/publicamundi/lib/metadata/types/baz.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ckanext.publicamundi.lib.metadata.types import Thesaurus, ThesaurusTerms
from ckanext.publicamundi.lib.metadata.types._common import *

thesaurus_gemet_themes = Thesaurus.make('keywords-gemet-themes')
thesaurus_gemet_themes = Thesaurus.lookup('keywords-gemet-themes')

class KeywordsFactory(object):

Expand Down
86 changes: 52 additions & 34 deletions ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import re
import uuid
import datetime
import zope.interface
import zope.schema
from zope.schema.vocabulary import SimpleVocabulary
Expand All @@ -17,6 +19,8 @@
from ckanext.publicamundi.lib.metadata.types.thesaurus import Thesaurus, ThesaurusTerms
from ckanext.publicamundi.lib.metadata.types._common import *

strptime = datetime.datetime.strptime

class KeywordsFactory(object):

__slots__ = ('_name',)
Expand All @@ -27,7 +31,7 @@ def __init__(self, thesaurus_name='keywords-gemet-inspire-themes'):
def __call__(self):
keywords = {}
keywords[self._name] = ThesaurusTerms(
terms=[], thesaurus=Thesaurus.make(self._name))
terms=[], thesaurus=Thesaurus.lookup(self._name))
return keywords

class TemporalExtentFactory(object):
Expand Down Expand Up @@ -63,6 +67,7 @@ class InspireMetadata(BaseMetadata):
topic_category = list

keywords = KeywordsFactory()
free_keywords = list

bounding_box = list

Expand Down Expand Up @@ -143,17 +148,13 @@ def to_xml(self, o=None, nsmap=None):
return e

def from_xml(self, e):
'''Build and return an InspireMetadata object serialized as an etree
Element e.
'''Build and return an InspireMetadata object from a (serialized) etree Element e.
'''

def to_date(string):
if isinstance(string, str):
return datetime.datetime.strptime(string,'%Y-%m-%d').date()
else:
return None
def to_date(s):
return strptime(s, '%Y-%m-%d').date() if isinstance(s, str) else None

def to_resp_party(alist):
def to_responsible_party(alist):
result = []
for it in alist:
result.append(ResponsibleParty(
Expand All @@ -162,6 +163,8 @@ def to_resp_party(alist):
role = it.role))
return result

# Parse object

md = MD_Metadata(e)

datestamp = to_date(md.datestamp)
Expand All @@ -176,25 +179,46 @@ def to_resp_party(alist):
for topic in md.identification.topiccategory:
topic_list.append(topic)

keywords_dict = {}
free_keywords = []
keywords = {}
for it in md.identification.keywords:
thes_title = it['thesaurus']['title']
if thes_title is not None:
thes_split = thes_title.split(',')
# TODO thes_split[1] (=version) can be used in a get_by_title_and_version()
# to enforce a specific thesaurus version.
thes_title = thes_split[0]
# Lookup and instantiate a named thesaurus
thes = None
if thes_title:
try:
thes_name = vocabularies.munge('Keywords-' + thes_title)
term_list = []
for t in it['keywords']:
term_list.append(t)
thes = Thesaurus.make(thes_name)
if thes:
kw = ThesaurusTerms(thesaurus=thes, terms=term_list)
keywords_dict.update({thes_name:kw})
thes_title, thes_version = thes_title.split(',')
except:
pass
thes_version = None
else:
thes_version = re.sub(r'^[ ]*version[ ]+(\d\.\d)$', r'\1', thes_version)
# Note thes_version can be used to enforce a specific thesaurus version
try:
thes = Thesaurus.lookup(title=thes_title, for_keywords=True)
except ValueError:
thes = None
# Treat present keywords depending on if they belong to a thesaurus
if thes:
# Treat as thesaurus terms; discard unknown terms
terms = []
for keyword in it['keywords']:
term = thes.vocabulary.by_value.get(keyword)
if not term:
term = thes.vocabulary.by_token.get(keyword)
if term:
terms.append(term.value)
keywords[thes.name] = ThesaurusTerms(thesaurus=thes, terms=terms)
else:
# Treat as free keywords (not really a thesaurus)
vocab_date = to_date(it['thesaurus']['date'])
vocab_datetype = it['thesaurus']['datetype']
for keyword in it['keywords']:
free_keywords.append(FreeKeyword(
value = keyword,
reference_date = vocab_date,
date_type = vocab_datetype,
originating_vocabulary = thes_title))

temporal_extent = []
if md.identification.temporalextent_start or md.identification.temporalextent_end:
temporal_extent = [TemporalExtent(
Expand Down Expand Up @@ -222,13 +246,6 @@ def to_resp_party(alist):
elif it.type == 'revision':
revision_date = to_date(it.date)

#if not creation_date:
# raise Exception('creation date not present','')
#elif not publication_date:
# raise Exception('publication date not present','')
#elif not revision_date:
# raise Exception('revision date not present','')

spatial_list = []

if len(md.identification.distance) != len(md.identification.uom):
Expand Down Expand Up @@ -291,7 +308,7 @@ def to_resp_party(alist):

obj = InspireMetadata()

obj.contact = to_resp_party(md.contact)
obj.contact = to_responsible_party(md.contact)
obj.datestamp = datestamp
obj.languagecode = md.languagecode
obj.title = unicode(md.identification.title)
Expand All @@ -300,7 +317,8 @@ def to_resp_party(alist):
obj.locator = url_list
#obj.resource_language = md.identification.resourcelanguage
obj.topic_category = topic_list
obj.keywords = keywords_dict
obj.keywords = keywords
obj.free_keywords = free_keywords
obj.bounding_box = bbox
obj.temporal_extent = temporal_extent
obj.creation_date = creation_date
Expand All @@ -311,7 +329,7 @@ def to_resp_party(alist):
obj.conformity = conf_list
obj.access_constraints = limit_list
obj.limitations = constr_list
obj.responsible_party = to_resp_party(md.identification.contact)
obj.responsible_party = to_responsible_party(md.identification.contact)

return obj

39 changes: 24 additions & 15 deletions ckanext/publicamundi/lib/metadata/types/thesaurus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,40 @@ class Thesaurus(Object):

@property
def vocabulary(self):
spec = vocabularies.get_by_name(self.name)
return spec.get('vocabulary') if spec else None
vocab = vocabularies.get_by_name(self.name)
return vocab.get('vocabulary') if vocab else None

# Factory for Thesaurus

@classmethod
def make(cls, name):
'''Create a new Thesaurus instance from it's machine-name name.
The metadata for this thesaurus are queried from vocabularies module.
def lookup(cls, name=None, title=None, for_keywords=False):
'''Lookup by name or title and return a Thesaurus instance.
Note: Maybe rename this class-method to lookup
This is a factory method that tries to instantiate a Thesaurus object
from a collection of well-known (mostly related to INSPIRE) vocabularies.
'''
spec = vocabularies.get_by_name(name)
if spec:

vocab = None

if (name is None) and title:
name = vocabularies.normalize_thesaurus_title(title, for_keywords)

if name:
vocab = vocabularies.get_by_name(name)
else:
raise ValueError('Expected a name/title lookup')

if vocab:
kwargs = {
'title': spec.get('title'),
'name': spec.get('name'),
'reference_date': spec.get('reference_date'),
'version' : spec.get('version'),
'date_type': spec.get('date_type'),
'title': vocab.get('title'),
'name': vocab.get('name'),
'reference_date': vocab.get('reference_date'),
'version' : vocab.get('version'),
'date_type': vocab.get('date_type'),
}
return cls(**kwargs)
else:
raise ValueError(
'Cannot find an INSPIRE thesaurus named "%s"' %(name))
raise ValueError('Cannot find a thesaurus named "%s"' %(name))

@object_null_adapter()
class ThesaurusTerms(Object):
Expand Down
8 changes: 3 additions & 5 deletions ckanext/publicamundi/lib/metadata/vocabularies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@

# Import loader

from ckanext.publicamundi.lib.metadata.vocabularies import json_loader

munge = json_loader.munge
from ckanext.publicamundi.lib.metadata.vocabularies.json_loader import (
make_vocabularies, normalize_keyword, normalize_thesaurus_title)

def _update(data_file, name_prefix='', overwrite=False):
'''Update the module-global vocabularies from external JSON data.
'''

for name, desc in json_loader.make_vocabularies(data_file):
for name, desc in make_vocabularies(data_file):
assert overwrite or not (name in vocabularies), (
'A vocabulary named %r is allready loaded' % (name))
vocabularies[name_prefix + name] = desc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Babel string extraction functions

def extract_json(fileobj, keywords, comment_tags, options):
"""Extract messages from XXX files.
"""Extract messages from files.
:param fileobj: the file-like object the messages should be extracted from
:param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions
:param comment_tags: a list of translator tags to search for and include in the results
Expand Down
Loading

0 comments on commit 66db577

Please sign in to comment.