Skip to content

Commit

Permalink
Merge pull request #282 from GSA/collection
Browse files Browse the repository at this point in the history
Collection
  • Loading branch information
FuhuXia authored Dec 11, 2024
2 parents 5ebb5e8 + 7510734 commit 2a97bb2
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 18 deletions.
48 changes: 41 additions & 7 deletions ckanext/geodatagov/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging

from ckan import model
from ckan import plugins as p
from ckanext.harvest.model import HarvestSource
from ckan.logic import NotFound, NotAuthorized
from ckan.logic import NotFound, NotAuthorized, get_action

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -59,12 +60,45 @@ def get_harvest_source_config(harvester_id):
return source_config


def get_collection_package(collection_package_id):
try:
package = p.toolkit.get_action('package_show')({}, {'id': collection_package_id})
return package
except (NotFound, NotAuthorized):
pass
def count_collection_package(source_id, identifier):
context = {'model': model, 'session': model.Session}
package_search = get_action('package_search')
search_params = {
'fq': f'harvest_source_id:{source_id} isPartOf:{identifier} include_collection:true',
'rows': 0,
}

search_result = package_search(context, search_params)

return search_result['count'] if search_result['count'] else 0


def get_collection_package(source_id, identifier):
context = {'model': model, 'session': model.Session}

package_search = get_action('package_search')
search_params = {
'fq': f'harvest_source_id:{source_id} identifier:{identifier}',
'rows': 1,
}

search_result = package_search(context, search_params)

ret = None

if search_result['results']:
collection_package_id = search_result['results'][0]['id']

try:
package = p.toolkit.get_action('package_show')(
context,
{'id': collection_package_id}
)
ret = package
except (NotFound, NotAuthorized):
pass

return ret


def string(value):
Expand Down
14 changes: 13 additions & 1 deletion ckanext/geodatagov/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,11 +499,22 @@ def before_dataset_search(self, search_params):
pattern = r'collection_info:"([^"]+?) ([^"]+)"'
fq = re.sub(pattern, r'harvest_source_id:"\1" isPartOf:"\2"', fq)
log.info('FQ changed for collection_info')
elif 'bulk_process' not in path:
elif 'bulk_process' not in path and 'include_collection' not in fq:
# hide collection's children datasets from regular search
fq += ' -isPartOf:["" TO *]'
log.info('Added FQ to hide collection')

# fq comes in as a string such as '(a:1 b:"2" c:["" to *])'
# remove string include_collection=true from fq, if found.
# Other values of include_collection will end up with a search term that return no results
pattern = r'include_collection:"?([^",\s)]+)"?'
match = re.search(pattern, fq, re.IGNORECASE)
if match and match.group(1).lower() == 'true':
fq = re.sub(pattern, '', fq, flags=re.IGNORECASE).strip()
# if include_collection=true is the only fq, we could end up with a set of parentheses.
if fq == "()":
fq = ""

search_params['fq'] = fq
return search_params

Expand Down Expand Up @@ -536,6 +547,7 @@ def get_helpers(self):
'get_harvest_source_type': geodatagov_helpers.get_harvest_source_type,
'get_harvest_source_config': geodatagov_helpers.get_harvest_source_config,
'get_collection_package': geodatagov_helpers.get_collection_package,
'count_collection_package': geodatagov_helpers.count_collection_package,
}

# IActions
Expand Down
37 changes: 27 additions & 10 deletions ckanext/geodatagov/templates/package/read.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,42 @@
{% set pkg_dict = c.pkg_dict %}

{% block collection_resources %}
{% set collection_package_id = h.get_pkg_dict_extra(c.pkg_dict, 'collection_package_id', '') %}
{% if h.get_pkg_dict_extra(c.pkg_dict, 'collection_metadata', '') %}
<section class="module-content">
<h3>{{ _('Collection') }}</h3>
<p>{{ _('This dataset is a collection of other datasets.') }}</p>
<p><a href="{{ h.url_for('search', collection_package_id=pkg_dict.id) }}" class="btn-collection">{{ _('Search datasets within this collection') }}</a></p>
</section>
{% elif collection_package_id %}
{% set collection_package = h.get_collection_package(collection_package_id) %}

{% set identifier = h.get_pkg_dict_extra(c.pkg_dict, 'Identifier', '') %}
{% set collection_sourceid = h.get_pkg_dict_extra(c.pkg_dict, 'harvest_source_id', '') %}
{% set collection_ispartof = h.get_pkg_dict_extra(c.pkg_dict, 'isPartOf', '') %}

{#
1. we check each dataset to see if it is a collection parent using its identifier.
if count_collection returns a value other than 0, then we know that this dataset is a collection parent.

2. we use collection_ispartof to tell if this dataset is a collection child.
if collection_package returns a dataset, then we know its collection parent is found.
#}

{% set count_collection = h.count_collection_package(collection_sourceid, identifier) %}

{% if count_collection or collection_ispartof %}
<section class="module-content">
<h3>{{ _('Collection') }}</h3>
{% if count_collection %}
{% set collection_info = collection_sourceid ~ ' ' ~ identifier %}
<p>This dataset is a collection of {{ count_collection }} other datasets.</p>
<p><a href="{{ h.url_for('search', collection_info=collection_info) }}" class="btn-collection">{{ _('Search datasets within this collection') }}</a></p>
{% endif %}

{% if collection_ispartof %}
{% set collection_info = collection_sourceid ~ ' ' ~ collection_ispartof %}
{% set collection_package = h.get_collection_package(collection_sourceid, collection_ispartof) %}
{% if collection_package %}
<p>{{ _('This dataset is part of the following collection:') }}</p>
<ul class="dataset-list unstyled">
{% snippet "snippets/package_item.html", package=collection_package, truncate=75 %}
</ul>
{% else %}
<p>{{ _('This dataset is part of a deleted collection.') }}</p>
<p><a href="{{ h.url_for('search', collection_package_id=collection_package_id) }}" class="btn-collection">{{ _('Search other datasets within the same collection') }}</a></p>
<p><a href="{{ h.url_for('search', collection_info=collection_info) }}" class="btn-collection">{{ _('Search other datasets within the same collection') }}</a></p>
{% endif %}
{% endif %}
</section>
{% endif %}
Expand Down

0 comments on commit 2a97bb2

Please sign in to comment.