diff --git a/ckanext/validation/jobs.py b/ckanext/validation/jobs.py index f0d658ec..be8d3919 100644 --- a/ckanext/validation/jobs.py +++ b/ckanext/validation/jobs.py @@ -8,6 +8,7 @@ import requests from sqlalchemy.orm.exc import NoResultFound from frictionless import validate, system, Report, Schema, Dialect, Check +from six import string_types from ckan.model import Session import ckan.lib.uploader as uploader @@ -15,11 +16,7 @@ import ckantoolkit as t from ckanext.validation.model import Validation -from ckanext.validation.utils import ( - get_update_mode_from_config, - send_validation_report, - validation_dictize, -) +from ckanext.validation.utils log = logging.getLogger(__name__) @@ -27,7 +24,17 @@ def run_validation_job(resource): - log.debug('Validating resource %s', resource['id']) + # handle either a resource dict or just an ID + # ID is more efficient, as resource dicts can be very large + if isinstance(resource, string_types): + log.debug(u'run_validation_job: calling resource_show: %s', resource) + resource = t.get_action('resource_show')({'ignore_auth': True}, {'id': resource}) + + resource_id = resource.get('id') + if resource_id: + log.debug(u'Validating resource: %s', resource_id) + else: + log.debug(u'Validating resource dict: %s', resource) try: validation = Session.query(Validation).filter( @@ -59,37 +66,38 @@ def run_validation_job(resource): {'ignore_auth': True}, {'id': resource['package_id']}) source = None - if resource.get('url_type') == 'upload': + if resource.get(u'url_type') == u'upload': upload = uploader.get_resource_uploader(resource) if isinstance(upload, uploader.ResourceUpload): - source = upload.get_path(resource['id']) + source = upload.get_path(resource[u'id']) else: # Upload is not the default implementation (ie it's a cloud storage # implementation) pass_auth_header = t.asbool( - t.config.get('ckanext.validation.pass_auth_header', True)) - if dataset['private'] and pass_auth_header: + t.config.get(u'ckanext.validation.pass_auth_header', True)) + if dataset[u'private'] and pass_auth_header: s = requests.Session() s.headers.update({ - 'Authorization': t.config.get( - 'ckanext.validation.pass_auth_header_value', - _get_site_user_api_key()) + u'Authorization': t.config.get( + u'ckanext.validation.pass_auth_header_value', + utils.get_site_user_api_key()) }) - options['http_session'] = s + options[u'http_session'] = s if not source: - source = resource['url'] - - schema = resource.get('schema') - if schema: - if isinstance(schema, str): - if schema.startswith('http'): - r = requests.get(schema) - schema = r.json() + source = resource[u'url'] + + schema = resource.get(u'schema') + if schema and isinstance(schema, string_types): + if schema.startswith('http'): + r = requests.get(schema) + schema = r.json() + else: schema = json.loads(schema) - _format = resource['format'].lower() + _format = resource[u'format'].lower() + report = _validate_table(source, _format=_format, schema=schema, **options) # Hide uploaded files @@ -127,30 +135,28 @@ def run_validation_job(resource): 'validation_timestamp': validation.finished.isoformat(), } - if get_update_mode_from_config() == 'sync': + if utils.get_update_mode_from_config() == 'sync': data_dict['_skip_next_validation'] = True, - patch_context = { - 'ignore_auth': True, + patch_context = + t.get_action('resource_patch')( + {'ignore_auth': True, 'user': t.get_action('get_site_user')({'ignore_auth': True})['name'], - '_validation_performed': True - } - t.get_action('resource_patch')(patch_context, data_dict) - send_validation_report(validation_dictize(validation)) - + '_validation_performed': True}, + data_dict) + utils.send_validation_report(utils.validation_dictize(validation)) - -def _validate_table(source, _format='csv', schema=None, **options): +def _validate_table(source, _format=u'csv', schema=None, **options): # This option is needed to allow Frictionless Framework to validate absolute paths frictionless_context = { 'trusted': True } http_session = options.pop('http_session', None) or requests.Session() - use_proxy = 'ckan.download_proxy' in t.config + use_proxy = 'ckan.download_proxy' in t.config if use_proxy: proxy = t.config.get('ckan.download_proxy') - log.debug('Download resource for validation via proxy: %s', proxy) + log.debug(u'Download resource for validation via proxy: %s', proxy) http_session.proxies.update({'http': proxy, 'https': proxy}) frictionless_context['http_session'] = http_session @@ -168,14 +174,6 @@ def _validate_table(source, _format='csv', schema=None, **options): with system.use_context(**frictionless_context): report = validate(source, format=_format, schema=resource_schema, **options) - log.debug('Validating source: %s', source) + log.debug(u'Validating source: %s', source) return report - - -def _get_site_user_api_key(): - - site_user_name = t.get_action('get_site_user')({'ignore_auth': True}, {}) - site_user = t.get_action('get_site_user')( - {'ignore_auth': True}, {'id': site_user_name}) - return site_user['apikey'] diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py index a47648ce..2a92e4bc 100644 --- a/ckanext/validation/plugin.py +++ b/ckanext/validation/plugin.py @@ -6,12 +6,12 @@ import ckan.plugins as p -import ckantoolkit as t +import ckantoolkit as tk -from . import settings, utils, validators +from . import settings as s, utils, validators from .helpers import _get_helpers -from ckanext.validation.model import tables_exist from .logic import action, auth +from .model import tables_exist from ckanext.validation.utils import ( get_create_mode_from_config, @@ -58,9 +58,9 @@ def update_config(self, config_): else: log.debug(u'Validation tables exist') - t.add_template_directory(config_, u'templates') - t.add_public_directory(config_, u'public') - t.add_resource(u'webassets', 'ckanext-validation') + tk.add_template_directory(config_, u'templates') + tk.add_public_directory(config_, u'public') + tk.add_resource(u'webassets', 'ckanext-validation') # IActions @@ -141,7 +141,7 @@ def _handle_validation_for_resource(self, context, resource): ) and ( # Make sure format is supported resource.get(u'format', u'').lower() in - settings.SUPPORTED_FORMATS + s.SUPPORTED_FORMATS )): needs_validation = True @@ -166,7 +166,7 @@ def before_resource_update(self, context, current_resource, updated_resource): # the call originates from a resource API, so don't validate the entire package package_id = updated_resource.get('package_id') if not package_id: - existing_resource = t.get_action('resource_show')( + existing_resource = tk.get_action('resource_show')( context={'ignore_auth': True}, data_dict={'id': updated_resource['id']}) if existing_resource: package_id = existing_resource['package_id'] @@ -190,7 +190,7 @@ def before_resource_update(self, context, current_resource, updated_resource): ) and ( # Make sure format is supported updated_resource.get(u'format', u'').lower() in - settings.SUPPORTED_FORMATS + s.SUPPORTED_FORMATS )): needs_validation = True @@ -287,4 +287,3 @@ def before_dataset_index(self, index_dict): index_dict['vocab_validation_status'] = res_status return index_dict - diff --git a/ckanext/validation/utils.py b/ckanext/validation/utils.py index f5cdb9fe..4c0881a7 100644 --- a/ckanext/validation/utils.py +++ b/ckanext/validation/utils.py @@ -80,16 +80,6 @@ def _run_async_validation(resource_id): resource_id, e) -def _should_remove_unsupported_resource_validation_reports(res_dict): - if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)): - return False - return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS - and (res_dict.get('url_type') == 'upload' - or not res_dict.get('url_type')) - and (t.h.asbool(res_dict.get('validation_status', False)) - or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) - - def _remove_unsupported_resource_validation_reports(resource_id): """ Callback to remove unsupported validation reports.