Skip to content

Commit

Permalink
chore: cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
duttonw committed Dec 11, 2024
1 parent 0808555 commit 0750ad6
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 64 deletions.
86 changes: 42 additions & 44 deletions ckanext/validation/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,33 @@
import requests
from sqlalchemy.orm.exc import NoResultFound
from frictionless import validate, system, Report, Schema, Dialect, Check
from six import string_types

from ckan.model import Session
import ckan.lib.uploader as uploader

import ckantoolkit as t

from ckanext.validation.model import Validation
from ckanext.validation.utils import (
get_update_mode_from_config,
send_validation_report,
validation_dictize,
)
from ckanext.validation.utils


log = logging.getLogger(__name__)


def run_validation_job(resource):

log.debug('Validating resource %s', resource['id'])
# handle either a resource dict or just an ID
# ID is more efficient, as resource dicts can be very large
if isinstance(resource, string_types):
log.debug(u'run_validation_job: calling resource_show: %s', resource)
resource = t.get_action('resource_show')({'ignore_auth': True}, {'id': resource})

resource_id = resource.get('id')
if resource_id:
log.debug(u'Validating resource: %s', resource_id)
else:
log.debug(u'Validating resource dict: %s', resource)

try:
validation = Session.query(Validation).filter(
Expand Down Expand Up @@ -59,37 +66,38 @@ def run_validation_job(resource):
{'ignore_auth': True}, {'id': resource['package_id']})

source = None
if resource.get('url_type') == 'upload':
if resource.get(u'url_type') == u'upload':
upload = uploader.get_resource_uploader(resource)
if isinstance(upload, uploader.ResourceUpload):
source = upload.get_path(resource['id'])
source = upload.get_path(resource[u'id'])
else:
# Upload is not the default implementation (ie it's a cloud storage
# implementation)
pass_auth_header = t.asbool(
t.config.get('ckanext.validation.pass_auth_header', True))
if dataset['private'] and pass_auth_header:
t.config.get(u'ckanext.validation.pass_auth_header', True))
if dataset[u'private'] and pass_auth_header:
s = requests.Session()
s.headers.update({
'Authorization': t.config.get(
'ckanext.validation.pass_auth_header_value',
_get_site_user_api_key())
u'Authorization': t.config.get(
u'ckanext.validation.pass_auth_header_value',
utils.get_site_user_api_key())
})

options['http_session'] = s
options[u'http_session'] = s

if not source:
source = resource['url']

schema = resource.get('schema')
if schema:
if isinstance(schema, str):
if schema.startswith('http'):
r = requests.get(schema)
schema = r.json()
source = resource[u'url']

schema = resource.get(u'schema')
if schema and isinstance(schema, string_types):
if schema.startswith('http'):
r = requests.get(schema)
schema = r.json()
else:
schema = json.loads(schema)

_format = resource['format'].lower()
_format = resource[u'format'].lower()

report = _validate_table(source, _format=_format, schema=schema, **options)

# Hide uploaded files
Expand Down Expand Up @@ -127,30 +135,28 @@ def run_validation_job(resource):
'validation_timestamp': validation.finished.isoformat(),
}

if get_update_mode_from_config() == 'sync':
if utils.get_update_mode_from_config() == 'sync':
data_dict['_skip_next_validation'] = True,

patch_context = {
'ignore_auth': True,
patch_context =
t.get_action('resource_patch')(
{'ignore_auth': True,
'user': t.get_action('get_site_user')({'ignore_auth': True})['name'],
'_validation_performed': True
}
t.get_action('resource_patch')(patch_context, data_dict)
send_validation_report(validation_dictize(validation))

'_validation_performed': True},
data_dict)
utils.send_validation_report(utils.validation_dictize(validation))



def _validate_table(source, _format='csv', schema=None, **options):
def _validate_table(source, _format=u'csv', schema=None, **options):

# This option is needed to allow Frictionless Framework to validate absolute paths
frictionless_context = { 'trusted': True }
http_session = options.pop('http_session', None) or requests.Session()
use_proxy = 'ckan.download_proxy' in t.config

use_proxy = 'ckan.download_proxy' in t.config
if use_proxy:
proxy = t.config.get('ckan.download_proxy')
log.debug('Download resource for validation via proxy: %s', proxy)
log.debug(u'Download resource for validation via proxy: %s', proxy)
http_session.proxies.update({'http': proxy, 'https': proxy})

frictionless_context['http_session'] = http_session
Expand All @@ -168,14 +174,6 @@ def _validate_table(source, _format='csv', schema=None, **options):

with system.use_context(**frictionless_context):
report = validate(source, format=_format, schema=resource_schema, **options)
log.debug('Validating source: %s', source)
log.debug(u'Validating source: %s', source)

return report


def _get_site_user_api_key():

site_user_name = t.get_action('get_site_user')({'ignore_auth': True}, {})
site_user = t.get_action('get_site_user')(
{'ignore_auth': True}, {'id': site_user_name})
return site_user['apikey']
19 changes: 9 additions & 10 deletions ckanext/validation/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@


import ckan.plugins as p
import ckantoolkit as t
import ckantoolkit as tk

from . import settings, utils, validators
from . import settings as s, utils, validators
from .helpers import _get_helpers
from ckanext.validation.model import tables_exist
from .logic import action, auth
from .model import tables_exist

from ckanext.validation.utils import (
get_create_mode_from_config,
Expand Down Expand Up @@ -58,9 +58,9 @@ def update_config(self, config_):
else:
log.debug(u'Validation tables exist')

t.add_template_directory(config_, u'templates')
t.add_public_directory(config_, u'public')
t.add_resource(u'webassets', 'ckanext-validation')
tk.add_template_directory(config_, u'templates')
tk.add_public_directory(config_, u'public')
tk.add_resource(u'webassets', 'ckanext-validation')

# IActions

Expand Down Expand Up @@ -141,7 +141,7 @@ def _handle_validation_for_resource(self, context, resource):
) and (
# Make sure format is supported
resource.get(u'format', u'').lower() in
settings.SUPPORTED_FORMATS
s.SUPPORTED_FORMATS
)):
needs_validation = True

Expand All @@ -166,7 +166,7 @@ def before_resource_update(self, context, current_resource, updated_resource):
# the call originates from a resource API, so don't validate the entire package
package_id = updated_resource.get('package_id')
if not package_id:
existing_resource = t.get_action('resource_show')(
existing_resource = tk.get_action('resource_show')(
context={'ignore_auth': True}, data_dict={'id': updated_resource['id']})
if existing_resource:
package_id = existing_resource['package_id']
Expand All @@ -190,7 +190,7 @@ def before_resource_update(self, context, current_resource, updated_resource):
) and (
# Make sure format is supported
updated_resource.get(u'format', u'').lower() in
settings.SUPPORTED_FORMATS
s.SUPPORTED_FORMATS
)):
needs_validation = True

Expand Down Expand Up @@ -287,4 +287,3 @@ def before_dataset_index(self, index_dict):
index_dict['vocab_validation_status'] = res_status

return index_dict

10 changes: 0 additions & 10 deletions ckanext/validation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,6 @@ def _run_async_validation(resource_id):
resource_id, e)


def _should_remove_unsupported_resource_validation_reports(res_dict):
if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
return False
return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
and (res_dict.get('url_type') == 'upload'
or not res_dict.get('url_type'))
and (t.h.asbool(res_dict.get('validation_status', False))
or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))


def _remove_unsupported_resource_validation_reports(resource_id):
"""
Callback to remove unsupported validation reports.
Expand Down

0 comments on commit 0750ad6

Please sign in to comment.