Skip to content

Commit

Permalink
chore: move functions out of plugin into utils
Browse files Browse the repository at this point in the history
  • Loading branch information
duttonw committed Dec 11, 2024
1 parent f001c86 commit 0808555
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 107 deletions.
116 changes: 10 additions & 106 deletions ckanext/validation/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,25 @@

import json
import logging
import cgi


from werkzeug.datastructures import FileStorage as FlaskFileStorage

import ckan.plugins as p
import ckantoolkit as t

from . import settings, validators
from . import settings, utils, validators
from .helpers import _get_helpers
from ckanext.validation.model import tables_exist
from .logic import action, auth
from ckanext.validation.helpers import (
get_validation_badge,
validation_extract_report_from_errors,
dump_json_value,
bootstrap_version,
validation_dict,
use_webassets,
)
from ckanext.validation.validators import (
resource_schema_validator,
validation_options_validator,
)

from ckanext.validation.utils import (
get_create_mode_from_config,
get_update_mode_from_config,
)
from ckanext.validation.interfaces import IDataValidation
from ckanext.validation import views, cli

ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)

log = logging.getLogger(__name__)


Expand Down Expand Up @@ -99,38 +87,6 @@ def get_validators(self):
resources_to_validate = {}
packages_to_skip = {}

def _process_schema_fields(self, data_dict):
u'''
Normalize the different ways of providing the `schema` field
1. If `schema_upload` is provided and it's a valid file, the contents
are read into `schema`.
2. If `schema_url` is provided and looks like a valid URL, it's copied
to `schema`
3. If `schema_json` is provided, it's copied to `schema`.
All the 3 `schema_*` fields are removed from the data_dict.
Note that the data_dict still needs to pass validation
'''

schema_upload = data_dict.pop(u'schema_upload', None)
schema_url = data_dict.pop(u'schema_url', None)
schema_json = data_dict.pop(u'schema_json', None)
if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES):
uploaded_file = _get_underlying_file(schema_upload)
data_dict[u'schema'] = uploaded_file.read()
if isinstance(data_dict["schema"], (bytes, bytearray)):
data_dict["schema"] = data_dict["schema"].decode()
elif schema_url:

if (not isinstance(schema_url, str) or
not schema_url.lower()[:4] == u'http'):
raise t.ValidationError({u'schema_url': 'Must be a valid URL'})
data_dict[u'schema'] = schema_url
elif schema_json:
data_dict[u'schema'] = schema_json

return data_dict

# CKAN < 2.10
def before_create(self, context, data_dict):
Expand All @@ -142,7 +98,7 @@ def before_resource_create(self, context, data_dict):
is_dataset = self._data_dict_is_dataset(data_dict)
if not is_dataset:
context["_resource_create_call"] = True
return self._process_schema_fields(data_dict)
return utils.process_schema_fields(data_dict)

# CKAN < 2.10
def after_create(self, context, data_dict):
Expand Down Expand Up @@ -196,7 +152,7 @@ def _handle_validation_for_resource(self, context, resource):
log.debug('Skipping validation for resource %s', resource['id'])
return

_run_async_validation(resource[u'id'])
utils._run_async_validation(resource[u'id'])

# CKAN < 2.10
def before_update(self, context, current_resource, updated_resource):
Expand All @@ -205,7 +161,7 @@ def before_update(self, context, current_resource, updated_resource):
# CKAN >= 2.10
def before_resource_update(self, context, current_resource, updated_resource):

updated_resource = self._process_schema_fields(updated_resource)
updated_resource = utils.process_schema_fields(updated_resource)

# the call originates from a resource API, so don't validate the entire package
package_id = updated_resource.get('package_id')
Expand Down Expand Up @@ -306,10 +262,10 @@ def after_resource_update(self, context, data_dict):

del self.resources_to_validate[resource_id]

_run_async_validation(resource_id)
utils._run_async_validation(resource_id)

if _should_remove_unsupported_resource_validation_reports(data_dict):
p.toolkit.enqueue_job(fn=_remove_unsupported_resource_validation_reports, args=[resource_id])
if utils._should_remove_unsupported_resource_validation_reports(data_dict):
p.toolkit.enqueue_job(fn=utils._remove_unsupported_resource_validation_reports, args=[resource_id])

# IPackageController

Expand All @@ -332,55 +288,3 @@ def before_dataset_index(self, index_dict):

return index_dict


def _run_async_validation(resource_id):

try:
t.get_action(u'resource_validation_run')(
{u'ignore_auth': True},
{u'resource_id': resource_id,
u'async': True})
except t.ValidationError as e:
log.warning(
u'Could not run validation for resource %s: %s',
resource_id, e)

def _get_underlying_file(wrapper):
if isinstance(wrapper, FlaskFileStorage):
return wrapper.stream
return wrapper.file


def _should_remove_unsupported_resource_validation_reports(res_dict):
if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
return False
return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
and (res_dict.get('url_type') == 'upload'
or not res_dict.get('url_type'))
and (t.h.asbool(res_dict.get('validation_status', False))
or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))


def _remove_unsupported_resource_validation_reports(resource_id):
"""
Callback to remove unsupported validation reports.
Controlled by config value: ckanext.validation.clean_validation_reports.
Double check the resource format. Only supported Validation formats should have validation reports.
If the resource format is not supported, we should delete the validation reports.
"""
context = {"ignore_auth": True}
try:
res = p.toolkit.get_action('resource_show')(context, {"id": resource_id})
except t.ObjectNotFound:
log.error('Resource %s does not exist.', resource_id)
return

if _should_remove_unsupported_resource_validation_reports(res):
log.info('Unsupported resource format "%s". Deleting validation reports for resource %s',
res.get(u'format', u''), res['id'])
try:
p.toolkit.get_action('resource_validation_delete')(context, {
"resource_id": res['id']})
log.info('Validation reports deleted for resource %s', res['id'])
except t.ObjectNotFound:
log.error('Validation reports for resource %s do not exist', res['id'])
106 changes: 105 additions & 1 deletion ckanext/validation/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,119 @@
import os
import logging
import cgi

from ckan.plugins import PluginImplementations
from ckan.lib.uploader import ResourceUpload
from ckantoolkit import config, asbool

from ckanext.validation.interfaces import IPipeValidation


log = logging.getLogger(__name__)

from . import settings
import ckan.plugins as p
import ckantoolkit as t

from werkzeug.datastructures import FileStorage as FlaskFileStorage
ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)

def process_schema_fields(data_dict):
u'''
Normalize the different ways of providing the `schema` field
1. If `schema_upload` is provided and it's a valid file, the contents
are read into `schema`.
2. If `schema_url` is provided and looks like a valid URL, it's copied
to `schema`
3. If `schema_json` is provided, it's copied to `schema`.
All the 3 `schema_*` fields are removed from the data_dict.
Note that the data_dict still needs to pass validation
'''

schema_upload = data_dict.pop(u'schema_upload', None)
schema_url = data_dict.pop(u'schema_url', None)
schema_json = data_dict.pop(u'schema_json', None)

if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES):
uploaded_file = _get_underlying_file(schema_upload)
data_dict[u'schema'] = uploaded_file.read()
if isinstance(data_dict["schema"], (bytes, bytearray)):
data_dict["schema"] = data_dict["schema"].decode()
elif schema_url:

if (not isinstance(schema_url, str) or
not schema_url.lower()[:4] == u'http'):
raise t.ValidationError({u'schema_url': 'Must be a valid URL'})
data_dict[u'schema'] = schema_url
elif schema_json:
data_dict[u'schema'] = schema_json

return data_dict

def _get_underlying_file(wrapper):
if isinstance(wrapper, FlaskFileStorage):
return wrapper.stream
return wrapper.file


def _should_remove_unsupported_resource_validation_reports(res_dict):
if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
return False
return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
and (res_dict.get('url_type') == 'upload'
or not res_dict.get('url_type'))
and (t.h.asbool(res_dict.get('validation_status', False))
or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))


def _run_async_validation(resource_id):

try:
t.get_action(u'resource_validation_run')(
{u'ignore_auth': True},
{u'resource_id': resource_id,
u'async': True})
except t.ValidationError as e:
log.warning(
u'Could not run validation for resource %s: %s',
resource_id, e)


def _should_remove_unsupported_resource_validation_reports(res_dict):
if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
return False
return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
and (res_dict.get('url_type') == 'upload'
or not res_dict.get('url_type'))
and (t.h.asbool(res_dict.get('validation_status', False))
or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))


def _remove_unsupported_resource_validation_reports(resource_id):
"""
Callback to remove unsupported validation reports.
Controlled by config value: ckanext.validation.clean_validation_reports.
Double check the resource format. Only supported Validation formats should have validation reports.
If the resource format is not supported, we should delete the validation reports.
"""
context = {"ignore_auth": True}
try:
res = p.toolkit.get_action('resource_show')(context, {"id": resource_id})
except t.ObjectNotFound:
log.error('Resource %s does not exist.', resource_id)
return

if _should_remove_unsupported_resource_validation_reports(res):
log.info('Unsupported resource format "%s". Deleting validation reports for resource %s',
res.get(u'format', u''), res['id'])
try:
p.toolkit.get_action('resource_validation_delete')(context, {
"resource_id": res['id']})
log.info('Validation reports deleted for resource %s', res['id'])
except t.ObjectNotFound:
log.error('Validation reports for resource %s do not exist', res['id'])


def get_update_mode_from_config():
if asbool(
Expand Down

0 comments on commit 0808555

Please sign in to comment.