diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..a89a787b --- /dev/null +++ b/.flake8 @@ -0,0 +1,20 @@ +[flake8] +# @see https://flake8.pycqa.org/en/latest/user/configuration.html?highlight=.flake8 + +exclude = + ckan + scripts + +# Extended output format. +format = pylint + +# Show the source of errors. +show_source = True + +max-complexity = 10 + +# List ignore rules one per line. +ignore = + E501 + C901 + W503 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..311db9ab --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,125 @@ +--- +#based on https://raw.githubusercontent.com/ckan/ckanext-scheming/master/.github/workflows/test.yml +# alternative https://github.com/ckan/ckan/blob/master/contrib/cookiecutter/ckan_extension/%7B%7Bcookiecutter.project%7D%7D/.github/workflows/test.yml +name: Tests +on: [push, pull_request] +env: + CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test + CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test + CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test + CKAN_SOLR_URL: http://solr:8983/solr/ckan + CKAN_REDIS_URL: redis://redis:6379/1 +jobs: + + + + + lint: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.6' + - name: Install requirements + run: pip install flake8 pycodestyle + - name: Check syntax + run: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan + + test: + needs: lint + strategy: + matrix: + # not ready for CKAN 2.9 yet + # ckan-version: [2.9, 2.9-py2, 2.8, 2.7] + ckan-version: [2.8, 2.7] + env: + - { ARCHIVER_GIT_REPO: "ckan", ARCHIVER_BRANCH: "master", REPORT_GIT_REPO: "datagovuk", REPORT_BRANCH: "master" } + - { ARCHIVER_GIT_REPO: "qld-gov-au", ARCHIVER_BRANCH: "2.1.0-qgov.1", REPORT_GIT_REPO: "qld-gov-au", REPORT_BRANCH: "0.1" } + - { ARCHIVER_GIT_REPO: "qld-gov-au", ARCHIVER_BRANCH: "develop", REPORT_GIT_REPO: "qld-gov-au", REPORT_BRANCH: "develop" } + fail-fast: false + + name: CKAN ${{ matrix.ckan-version }} + runs-on: ubuntu-18.04 + container: + image: openknowledge/ckan-dev:${{ matrix.ckan-version }} + services: + solr: + image: ckan/ckan-solr-dev:${{ matrix.ckan-version }} + postgres: + image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }} + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + redis: + image: redis:3 + env: ${{ matrix.env }} + + steps: + - uses: actions/checkout@v2 + + - name: Install report and archiver plugins + run: | + echo "Installing dependency ckanext-report and its requirements..." + if [ ! -d ckanext-report ]; then + git clone --depth=50 --branch=$REPORT_BRANCH https://github.com/$REPORT_GIT_REPO/ckanext-report ckanext-report + fi + cd ckanext-report + if [ -f pip-requirements.txt ]; then + pip install -r pip-requirements.txt + fi + if [ -f dev-requirements.txt ]; then + pip install -r dev-requirements.txt + fi + + if [ -f requirements.txt ]; then + pip install -r requirements.txt + fi + pip install --no-deps -e . + cd .. + + echo "Installing dependency ckanext-archiver and its requirements..." + if [ ! -d ckanext-archiver ]; then + git clone --depth=50 --branch=$ARCHIVER_BRANCH https://github.com/$ARCHIVER_GIT_REPO/ckanext-archiver ckanext-archiver + fi + cd ckanext-archiver + if [ -f pip-requirements.txt ]; then + pip install -r pip-requirements.txt + fi + if [ -f dev-requirements.txt ]; then + pip install -r dev-requirements.txt + fi + + if [ -f requirements.txt ]; then + pip install -r requirements.txt + fi + pip install --no-deps -e . + cd .. + + - name: Install requirements + run: | + pip install -r dev-requirements.txt + if [ -f pip-requirements.txt ]; then + pip install -r pip-requirements.txt + fi + pip install -r requirements.txt + pip install -e . + apk add file + # Replace default path to CKAN core config file with the one on the container + sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini + + - name: Setup extension (CKAN >= 2.9) + if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }} + run: | + ckan -c test.ini db init + - name: Setup extension (CKAN < 2.9) + if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }} + run: | + paster --plugin=ckan db init -c test.ini + - name: Run all tests + run: | + nosetests --with-pylons=test.ini --with-coverage --cover-package=ckanext.qa --cover-inclusive --cover-erase --cover-tests + + diff --git a/bin/travis-build.bash b/bin/travis-build.bash index fa1b072c..1805f7ab 100644 --- a/bin/travis-build.bash +++ b/bin/travis-build.bash @@ -6,7 +6,16 @@ echo "This is travis-build.bash..." echo "Installing the packages that CKAN requires..." sudo apt-get update -qq -sudo apt-get install solr-jetty libcommons-fileupload-java +sudo apt-get install -y solr-jetty libcommons-fileupload-java + +ver=$(python -c"import sys; print(sys.version_info.major)") +if [ $ver -eq 2 ]; then + echo "python version 2" +elif [ $ver -eq 3 ]; then + echo "python version 3" +else + echo "Unknown python version: $ver" +fi echo "Upgrading libmagic for ckanext-qa..." # appears to upgrade it from 5.09-2 to 5.09-2ubuntu0.6 which seems to help the tests @@ -14,26 +23,45 @@ sudo apt-get install libmagic1 echo "Installing CKAN and its Python dependencies..." git clone https://github.com/ckan/ckan -cd ckan +pushd ckan + +if [ -f requirement-setuptools.txt ]; then + pip install -r requirement-setuptools.txt +fi -if [ $CKANVERSION == 'master' ] +if [ ${CKANVERSION:-master} == 'master' ] then echo "CKAN version: master" + export CKAN_MINOR_VERSION=100 else + export CKAN_MINOR_VERSION=${CKANVERSION##*.} CKAN_TAG=$(git tag | grep ^ckan-$CKANVERSION | sort --version-sort | tail -n 1) git checkout $CKAN_TAG echo "CKAN version: ${CKAN_TAG#ckan-}" fi -python setup.py develop -if [ -f requirements-py2.txt ] +if (( "$CKAN_MINOR_VERSION" >= 9 )) && (( $ver = 2 )) then pip install -r requirements-py2.txt else pip install -r requirements.txt fi pip install -r dev-requirements.txt --allow-all-external -cd - +python setup.py develop + +echo "Creating the PostgreSQL user and database..." +sudo -u postgres psql -c "CREATE USER ckan_default WITH PASSWORD 'pass';" +sudo -u postgres psql -c 'CREATE DATABASE ckan_test WITH OWNER ckan_default;' + +echo "Initialising the database..." +if (( $CKAN_MINOR_VERSION >= 9 )) +then + ckan -c test-core.ini db init +else + paster db init -c test-core.ini +fi + +popd echo "Setting up Solr..." # solr is multicore for tests on ckan master now, but it's easier to run tests @@ -44,32 +72,35 @@ printf "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml sudo service jetty restart -echo "Creating the PostgreSQL user and database..." -sudo -u postgres psql -c "CREATE USER ckan_default WITH PASSWORD 'pass';" -sudo -u postgres psql -c 'CREATE DATABASE ckan_test WITH OWNER ckan_default;' - -echo "Initialising the database..." -cd ckan -paster db init -c test-core.ini -cd - - echo "Installing dependency ckanext-report and its requirements..." -pip install -e git+https://github.com/datagovuk/ckanext-report.git#egg=ckanext-report +git clone --depth=50 https://github.com/datagovuk/ckanext-report.git +pushd ckanext-report + if [ -f requirements-py2.txt ] && [ $ver -eq 2 ]; then + pip install -r requirements-py2.txt + elif [ -f requirements.txt ]; then + pip install -r requirements.txt + fi + pip install --no-deps -e . +popd echo "Installing dependency ckanext-archiver and its requirements..." -git clone https://github.com/ckan/ckanext-archiver.git -cd ckanext-archiver -pip install -e . -pip install -r requirements.txt -cd - +git clone --depth=50 https://github.com/ckan/ckanext-archiver.git +pushd ckanext-archiver + if [ -f requirements-py2.txt ] && [ $ver -eq 2 ]; then + pip install -r requirements-py2.txt + elif [ -f requirements.txt ]; then + pip install -r requirements.txt + fi + pip install --no-deps -e . +popd echo "Installing ckanext-qa and its requirements..." -python setup.py develop pip install -r requirements.txt pip install -r dev-requirements.txt +python setup.py develop echo "Moving test-core.ini into a subdir..." -mkdir subdir -mv test-core.ini subdir +mkdir -p subdir +cp test-core.ini subdir echo "travis-build.bash is done." diff --git a/bin/travis-run.sh b/bin/travis-run.sh index 5c4022b7..1a6e7ef3 100644 --- a/bin/travis-run.sh +++ b/bin/travis-run.sh @@ -3,4 +3,4 @@ echo "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" | sudo tee /etc/default/jetty sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml sudo service jetty restart -nosetests --with-pylons=subdir/test-core.ini --with-coverage --cover-package=ckanext.archiver --cover-inclusive --cover-erase --cover-tests +nosetests --with-pylons=subdir/test-core.ini --with-coverage --cover-package=ckanext.qa --cover-inclusive --cover-erase --cover-tests diff --git a/ckanext/qa/__init__.py b/ckanext/qa/__init__.py index 53fd0507..21f26a28 100644 --- a/ckanext/qa/__init__.py +++ b/ckanext/qa/__init__.py @@ -6,4 +6,4 @@ import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) -__version__ = '2.0' +__version__ = '2.1.0-rc1' diff --git a/ckanext/qa/bin/common.py b/ckanext/qa/bin/common.py index 0ace784e..75e884fc 100644 --- a/ckanext/qa/bin/common.py +++ b/ckanext/qa/bin/common.py @@ -48,5 +48,5 @@ def get_resources(state='active', publisher_ref=None, resource_id=None, dataset_ resources = resources.filter(model.Resource.id == resource_id) criteria.append('Resource:%s' % resource_id) resources = resources.all() - print '%i resources (%s)' % (len(resources), ' '.join(criteria)) + print('%i resources (%s)' % (len(resources), ' '.join(criteria))) return resources diff --git a/ckanext/qa/bin/migrate_task_status.py b/ckanext/qa/bin/migrate_task_status.py index f57b1bf5..125190b1 100644 --- a/ckanext/qa/bin/migrate_task_status.py +++ b/ckanext/qa/bin/migrate_task_status.py @@ -19,7 +19,7 @@ START_OF_TIME = datetime.datetime(1980, 1, 1) END_OF_TIME = datetime.datetime(9999, 12, 31) -TODAY = datetime.datetime.now() +TODAY = datetime.datetime.utcnow() # NB put no CKAN imports here, or logging breaks @@ -59,7 +59,7 @@ def migrate(options): # time, so some timezone nonesense going on. Can't do much. archival = Archival.get_for_resource(res.id) if not archival: - print add_stat('QA but no Archival data', res, stats) + print(add_stat('QA but no Archival data', res, stats)) continue archival_date = archival.updated # the state of the resource was as it was archived on the date of @@ -112,10 +112,10 @@ def migrate(options): model.Session.add(qa) add_stat('Added to QA table', res, stats) - print 'Summary\n', stats.report() + print('Summary\n', stats.report()) if options.write: model.repo.commit_and_remove() - print 'Written' + print('Written') def add_stat(outcome, res, stats, extra_info=None): @@ -154,10 +154,10 @@ def date_str_to_datetime_or_none(date_str): if len(args) != 1: parser.error('Wrong number of arguments (%i)' % len(args)) config_ini = args[0] - print 'Loading CKAN config...' + print('Loading CKAN config...') common.load_config(config_ini) common.register_translator() - print 'Done' + print('Done') # Setup logging to print debug out for local only rootLogger = logging.getLogger() rootLogger.setLevel(logging.WARNING) diff --git a/ckanext/qa/bin/running_stats.py b/ckanext/qa/bin/running_stats.py index 947797aa..fbb0a635 100644 --- a/ckanext/qa/bin/running_stats.py +++ b/ckanext/qa/bin/running_stats.py @@ -14,7 +14,7 @@ package_stats.increment('deleted') else: package_stats.increment('not deleted') -print package_stats.report() +print(package_stats.report()) > deleted: 30 > not deleted: 70 @@ -26,7 +26,7 @@ package_stats.add('deleted', package.name) else: package_stats.add('not deleted' package.name) -print package_stats.report() +print(package_stats.report()) > deleted: 30 pollution-uk, flood-regions, river-quality, ... > not deleted: 70 spending-bristol, ... @@ -42,7 +42,7 @@ class StatsCount(dict): report_value_limit = 150 def __init__(self, *args, **kwargs): - self._start_time = datetime.datetime.now() + self._start_time = datetime.datetime.utcnow() super(StatsCount, self).__init__(*args, **kwargs) def _init_category(self, category): @@ -80,7 +80,7 @@ def report(self, indent=1, order_by_title=False, show_time_taken=True): lines = [indent_str + 'None'] if show_time_taken: - time_taken = datetime.datetime.now() - self._start_time + time_taken = datetime.datetime.utcnow() - self._start_time lines.append(indent_str + 'Time taken (h:m:s): %s' % time_taken) return '\n'.join(lines) @@ -110,6 +110,6 @@ def report_value(self, category): package_stats.add('Success', 'good3') package_stats.add('Success', 'good4') package_stats.add('Failure', 'bad1') - print package_stats.report() + print(package_stats.report()) - print StatsList().report() + print(StatsList().report()) diff --git a/ckanext/qa/commands.py b/ckanext/qa/commands.py index 992fb0cd..d0b0b5ea 100644 --- a/ckanext/qa/commands.py +++ b/ckanext/qa/commands.py @@ -1,4 +1,5 @@ import logging +import six import sys from sqlalchemy import or_ @@ -65,7 +66,7 @@ def command(self): Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ['--help', '-h', 'help']: - print QACommand.__doc__ + print(QACommand.__doc__) return cmd = self.args[0] @@ -177,44 +178,44 @@ def sniff(self): from ckanext.qa.sniff_format import sniff_file_format if len(self.args) < 2: - print 'Not enough arguments', self.args + print('Not enough arguments', self.args) sys.exit(1) for filepath in self.args[1:]: format_ = sniff_file_format( filepath, logging.getLogger('ckanext.qa.sniffer')) if format_: - print 'Detected as: %s - %s' % (format_['display_name'], - filepath) + print('Detected as: %s - %s' % (format_['display_name'], + filepath)) else: - print 'ERROR: Could not recognise format of: %s' % filepath + print('ERROR: Could not recognise format of: %s' % filepath) def view(self, package_ref=None): from ckan import model q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') - print 'QA records - %i TaskStatus rows' % q.count() - print ' across %i Resources' % q.distinct('entity_id').count() + print('QA records - %i TaskStatus rows' % q.count()) + print(' across %i Resources' % q.distinct('entity_id').count()) if package_ref: pkg = model.Package.get(package_ref) - print 'Package %s %s' % (pkg.name, pkg.id) + print('Package %s %s' % (pkg.name, pkg.id)) for res in pkg.resources: - print 'Resource %s' % res.id + print('Resource %s' % res.id) for row in q.filter_by(entity_id=res.id): - print '* %s = %r error=%r' % (row.key, row.value, - row.error) + print('* %s = %r error=%r' % (row.key, row.value, + row.error)) def clean(self): from ckan import model - print 'Before:' + print('Before:') self.view() q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') q.delete() model.Session.commit() - print 'After:' + print('After:') self.view() def migrate1(self): @@ -223,32 +224,32 @@ def migrate1(self): q_status = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='status') - print '* %s with "status" will be deleted e.g. %s' % (q_status.count(), - q_status.first()) + print('* %s with "status" will be deleted e.g. %s' % (q_status.count(), + q_status.first())) q_failures = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='openness_score_failure_count') - print '* %s with openness_score_failure_count to be deleted e.g.\n%s'\ - % (q_failures.count(), q_failures.first()) + print('* %s with openness_score_failure_count to be deleted e.g.\n%s' + % (q_failures.count(), q_failures.first())) q_score = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='openness_score') - print '* %s with openness_score to migrate e.g.\n%s' % \ - (q_score.count(), q_score.first()) + print('* %s with openness_score to migrate e.g.\n%s' % + (q_score.count(), q_score.first())) q_reason = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='openness_score_reason') - print '* %s with openness_score_reason to migrate e.g.\n%s' % \ - (q_reason.count(), q_reason.first()) - raw_input('Press Enter to continue') + print('* %s with openness_score_reason to migrate e.g.\n%s' % + (q_reason.count(), q_reason.first())) + six.input('Press Enter to continue') q_status.delete() model.Session.commit() - print '..."status" deleted' + print('..."status" deleted') q_failures.delete() model.Session.commit() - print '..."openness_score_failure_count" deleted' + print('..."openness_score_failure_count" deleted') for task_status in q_score: reason_task_status = q_reason \ @@ -265,15 +266,15 @@ def migrate1(self): 'reason': reason, 'format': None, 'is_broken': None, - }) + }) model.Session.commit() - print '..."openness_score" and "openness_score_reason" migrated' + print('..."openness_score" and "openness_score_reason" migrated') count = q_reason.count() q_reason.delete() model.Session.commit() - print '... %i remaining "openness_score_reason" deleted' % count + print('... %i remaining "openness_score_reason" deleted' % count) model.Session.flush() model.Session.remove() - print 'Migration succeeded' + print('Migration succeeded') diff --git a/ckanext/qa/controllers.py b/ckanext/qa/controllers.py index 493eed7f..4cedcbb5 100644 --- a/ckanext/qa/controllers.py +++ b/ckanext/qa/controllers.py @@ -102,7 +102,7 @@ def _check_link(self, url): result['mimetype'] = self._extract_mimetype(headers) result['size'] = headers.get('content-length', '') result['last_modified'] = self._parse_and_format_date(headers.get('last-modified', '')) - except LinkCheckerError, e: + except LinkCheckerError as e: result['url_errors'].append(str(e)) return result diff --git a/ckanext/qa/lib.py b/ckanext/qa/lib.py index 2113badd..712a8741 100644 --- a/ckanext/qa/lib.py +++ b/ckanext/qa/lib.py @@ -55,7 +55,7 @@ def resource_format_scores(): with open(json_filepath) as format_file: try: file_resource_formats = json.loads(format_file.read()) - except ValueError, e: + except ValueError as e: # includes simplejson.decoder.JSONDecodeError raise ValueError('Invalid JSON syntax in %s: %s' % (json_filepath, e)) @@ -90,7 +90,7 @@ def create_qa_update_package_task(package, queue): from pylons import config ckan_ini_filepath = os.path.abspath(config.__file__) - compat_enqueue('qa.update_package', tasks.update_package, queue, args=[ckan_ini_filepath, package.id]) + compat_enqueue('qa.update_package', tasks.update_package, queue, args=[ckan_ini_filepath, package.id]) log.debug('QA of package put into celery queue %s: %s', queue, package.name) diff --git a/ckanext/qa/logic/action.py b/ckanext/qa/logic/action.py index 8914c670..e176a7d4 100644 --- a/ckanext/qa/logic/action.py +++ b/ckanext/qa/logic/action.py @@ -30,7 +30,7 @@ def qa_resource_show(context, data_dict): 'name': pkg.name, 'title': pkg.title, 'id': res.id - } + } return_dict['archival'] = archival.as_dict() return_dict.update(qa.as_dict()) return return_dict diff --git a/ckanext/qa/model.py b/ckanext/qa/model.py index 9e6b97a1..94eafdc7 100644 --- a/ckanext/qa/model.py +++ b/ckanext/qa/model.py @@ -1,5 +1,6 @@ import uuid import datetime +import six from sqlalchemy import Column from sqlalchemy import types @@ -15,7 +16,7 @@ def make_uuid(): - return unicode(uuid.uuid4()) + return six.text_type(uuid.uuid4()) class QA(Base): @@ -35,12 +36,12 @@ class QA(Base): openness_score_reason = Column(types.UnicodeText) format = Column(types.UnicodeText) - created = Column(types.DateTime, default=datetime.datetime.now) - updated = Column(types.DateTime, default=datetime.datetime.now) + created = Column(types.DateTime, default=datetime.datetime.utcnow) + updated = Column(types.DateTime, default=datetime.datetime.utcnow) def __repr__(self): summary = 'score=%s format=%s' % (self.openness_score, self.format) - details = unicode(self.openness_score_reason).encode('unicode_escape') + details = six.text_type(self.openness_score_reason).encode('unicode_escape') package = model.Package.get(self.package_id) package_name = package.name if package else '?%s?' % self.package_id return '' % \ diff --git a/ckanext/qa/plugin.py b/ckanext/qa/plugin.py index 876459d1..cfd92766 100644 --- a/ckanext/qa/plugin.py +++ b/ckanext/qa/plugin.py @@ -67,7 +67,7 @@ def get_actions(self): return { 'qa_resource_show': action.qa_resource_show, 'qa_package_openness_show': action.qa_package_openness_show, - } + } # IAuthFunctions @@ -75,7 +75,7 @@ def get_auth_functions(self): return { 'qa_resource_show': auth.qa_resource_show, 'qa_package_openness_show': auth.qa_package_openness_show, - } + } # ITemplateHelpers @@ -85,7 +85,7 @@ def get_helpers(self): helpers.qa_openness_stars_resource_html, 'qa_openness_stars_dataset_html': helpers.qa_openness_stars_dataset_html, - } + } # IPackageController diff --git a/ckanext/qa/reports.py b/ckanext/qa/reports.py index c50b56de..9da09a64 100644 --- a/ckanext/qa/reports.py +++ b/ckanext/qa/reports.py @@ -72,7 +72,7 @@ def openness_index(include_sub_organizations=False): table = [] for org_name, org_counts in results.iteritems(): - total_stars = sum([k*v for k, v in org_counts['score_counts'].items() if k]) + total_stars = sum([k * v for k, v in org_counts['score_counts'].items() if k]) num_pkgs_scored = sum([v for k, v in org_counts['score_counts'].items() if k is not None]) average_stars = round(float(total_stars) / num_pkgs_scored, 1) \ @@ -82,7 +82,7 @@ def openness_index(include_sub_organizations=False): ('organization_name', org_name), ('total_stars', total_stars), ('average_stars', average_stars), - )) + )) row.update(jsonify_counter(org_counts['score_counts'])) table.append(row) @@ -136,10 +136,10 @@ def openness_for_organization(organization=None, include_sub_organizations=False ('organization_title', org.title), ('openness_score', qa['openness_score']), ('openness_score_reason', qa['openness_score_reason']), - ))) + ))) score_counts[qa['openness_score']] += 1 - total_stars = sum([k*v for k, v in score_counts.items() if k]) + total_stars = sum([k * v for k, v in score_counts.items() if k]) num_pkgs_with_stars = sum([v for k, v in score_counts.items() if k is not None]) average_stars = round(float(total_stars) / num_pkgs_with_stars, 1) \ @@ -172,7 +172,7 @@ def openness_report_combinations(): 'option_combinations': openness_report_combinations, 'generate': openness_report, 'template': 'report/openness.html', - } +} def jsonify_counter(counter): diff --git a/ckanext/qa/sniff_format.py b/ckanext/qa/sniff_format.py index 856447fa..2a3b8a20 100644 --- a/ckanext/qa/sniff_format.py +++ b/ckanext/qa/sniff_format.py @@ -1,7 +1,9 @@ +# encoding: utf-8 import re import zipfile import os from collections import defaultdict +import six import subprocess import StringIO @@ -16,6 +18,7 @@ log = logging.getLogger(__name__) + def sniff_file_format(filepath): '''For a given filepath, work out what file format it is. @@ -33,12 +36,13 @@ def sniff_file_format(filepath): ''' format_ = None log.info('Sniffing file format of: %s', filepath) - filepath_utf8 = filepath.encode('utf8') if isinstance(filepath, unicode) \ + filepath_utf8 = filepath.encode('utf8') if isinstance(filepath, six.string_types) \ else filepath mime_type = magic.from_file(filepath_utf8, mime=True) log.info('Magic detects file as: %s', mime_type) if mime_type: - if mime_type == 'application/xml': + # some operating systems magic mime xml as text/xml + if mime_type == 'application/xml' or mime_type == 'text/xml': with open(filepath) as f: buf = f.read(5000) format_ = get_xml_variant_including_xml_declaration(buf) @@ -120,7 +124,7 @@ def sniff_file_format(filepath): if has_rdfa(buf): format_ = {'format': 'RDFa'} - else: + if not format_: # Excel files sometimes not picked up by magic, so try alternative if is_excel(filepath): format_ = {'format': 'XLS'} @@ -139,14 +143,14 @@ def is_json(buf): JSON format.''' string = '"[^"]*"' string_re = re.compile(string) - number_re = re.compile('-?\d+(\.\d+)?([eE][+-]?\d+)?') - extra_values_re = re.compile('true|false|null') - object_start_re = re.compile('{%s:\s?' % string) - object_middle_re = re.compile('%s:\s?' % string) - object_end_re = re.compile('}') - comma_re = re.compile(',\s?') - array_start_re = re.compile('\[') - array_end_re = re.compile('\]') + number_re = re.compile(r'-?\d+(\.\d+)?([eE][+-]?\d+)?') + extra_values_re = re.compile(r'true|false|null') + object_start_re = re.compile(r'{%s:\s?' % string) + object_middle_re = re.compile(r'%s:\s?' % string) + object_end_re = re.compile(r'}') + comma_re = re.compile(r',\s?') + array_start_re = re.compile(r'\[') + array_end_re = re.compile(r'\]') any_value_regexs = [string_re, number_re, object_start_re, array_start_re, extra_values_re] # simplified state machine - just looks at stack of object/array and @@ -256,7 +260,7 @@ def get_cells_per_row(num_cells, num_rows): def is_html(buf): '''If this buffer is HTML, return that format type, else None.''' - xml_re = '.{0,3}\s*(<\?xml[^>]*>\s*)?(]*>\s*)?]*>' + xml_re = r'.{0,3}\s*(<\?xml[^>]*>\s*)?(]*>\s*)?]*>' match = re.match(xml_re, buf, re.IGNORECASE) if match: log.info('HTML tag detected') @@ -266,7 +270,7 @@ def is_html(buf): def is_iati(buf): '''If this buffer is IATI format, return that format type, else None.''' - xml_re = '.{0,3}\s*(<\?xml[^>]*>\s*)?(]*>\s*)?]*>' + xml_re = r'.{0,3}\s*(<\?xml[^>]*>\s*)?(]*>\s*)?]*>' match = re.match(xml_re, buf, re.IGNORECASE) if match: log.info('IATI tag detected') @@ -277,13 +281,13 @@ def is_iati(buf): def is_xml_but_without_declaration(buf): '''Decides if this is a buffer of XML, but missing the usual tag.''' - xml_re = '.{0,3}\s*(<\?xml[^>]*>\s*)?(]*>\s*)?<([^>\s]*)([^>]*)>' + xml_re = r'.{0,3}\s*(<\?xml[^>]*>\s*)?(]*>\s*)?<([^>\s]*)([^>]*)>' match = re.match(xml_re, buf, re.IGNORECASE) if match: top_level_tag_name, top_level_tag_attributes = match.groups()[-2:] - if 'xmlns:' not in top_level_tag_attributes and \ - (len(top_level_tag_name) > 20 or - len(top_level_tag_attributes) > 200): + if ('xmlns:' not in top_level_tag_attributes + and (len(top_level_tag_name) > 20 + or len(top_level_tag_attributes) > 200)): log.debug('Not XML (without declaration) - unlikely length first tag: <%s %s>', top_level_tag_name, top_level_tag_attributes) return False @@ -318,9 +322,9 @@ def start_element(name, attrs): p.StartElementHandler = start_element try: p.Parse(buf) - except GotFirstTag, e: - top_level_tag_name = str(e).lower() - except xml.sax.SAXException, e: + except GotFirstTag as e: + top_level_tag_name = six.text_type(e).lower() + except xml.sax.SAXException as e: log.info('Sax parse error: %s %s', e, buf) return {'format': 'XML'} @@ -354,8 +358,8 @@ def has_rdfa(buf): return False # more rigorous check for them as tag attributes - about_re = '<[^>]+\sabout="[^"]+"[^>]*>' - property_re = '<[^>]+\sproperty="[^"]+"[^>]*>' + about_re = r'<[^>]+\sabout="[^"]+"[^>]*>' + property_re = r'<[^>]+\sproperty="[^"]+"[^>]*>' # remove CR to catch tags spanning more than one line # buf = re.sub('\r\n', ' ', buf) if not re.search(about_re, buf): @@ -381,11 +385,11 @@ def get_zipped_format(filepath): filepaths = zip.namelist() finally: zip.close() - except zipfile.BadZipfile, e: + except zipfile.BadZipfile as e: log.info('Zip file open raised error %s: %s', e, e.args) return - except Exception, e: + except Exception as e: log.warning('Zip file open raised exception %s: %s', e, e.args) return @@ -438,7 +442,7 @@ def get_zipped_format(filepath): def is_excel(filepath): try: xlrd.open_workbook(filepath) - except Exception, e: + except Exception as e: log.info('Not Excel - failed to load: %s %s', e, e.args) return False else: @@ -534,12 +538,12 @@ def turtle_regex(): ''' global turtle_regex_ if not turtle_regex_: - rdf_term = '(<[^ >]+>|_:\S+|".+?"(@\w+)?(\^\^\S+)?|\'.+?\'(@\w+)?(\^\^\S+)?|""".+?"""(@\w+)' \ - '?(\^\^\S+)?|\'\'\'.+?\'\'\'(@\w+)?(\^\^\S+)?|[+-]?([0-9]+|[0-9]*\.[0-9]+)(E[+-]?[0-9]+)?|false|true)' + rdf_term = r'(<[^ >]+>|_:\S+|".+?"(@\w+)?(\^\^\S+)?|\'.+?\'(@\w+)?(\^\^\S+)?|""".+?"""(@\w+)' \ + r'?(\^\^\S+)?|\'\'\'.+?\'\'\'(@\w+)?(\^\^\S+)?|[+-]?([0-9]+|[0-9]*\.[0-9]+)(E[+-]?[0-9]+)?|false|true)' # simple case is: triple_re = '^T T T \.$'.replace('T', rdf_term) # but extend to deal with multiple predicate-objects: # triple = '^T T T\s*(;\s*T T\s*)*\.\s*$'.replace('T', rdf_term).replace(' ', '\s+') - triple = '(^T|;)\s*T T\s*(;|\.\s*$)'.replace('T', rdf_term).replace(' ', '\s+') + triple = r'(^T|;)\s*T T\s*(;|\.\s*$)'.replace('T', rdf_term).replace(' ', r'\s+') turtle_regex_ = re.compile(triple, re.MULTILINE) return turtle_regex_ diff --git a/ckanext/qa/tasks.py b/ckanext/qa/tasks.py index 26fe8289..c33d612e 100644 --- a/ckanext/qa/tasks.py +++ b/ckanext/qa/tasks.py @@ -4,15 +4,25 @@ ''' import datetime import json +import math import os +import six +import tempfile +import time import traceback + import urlparse import routes -from ckan.common import _ +import requests +from ckan.common import _ from ckan.lib import i18n from ckan.plugins import toolkit +try: + from ckan.plugins.toolkit import config +except ImportError: + from pylons import config import ckan.lib.helpers as ckan_helpers from sniff_format import sniff_file_format import lib @@ -22,6 +32,11 @@ log = logging.getLogger(__name__) +SSL_VERIFY = True +MAX_CONTENT_LENGTH = int(config.get('ckanext.qa.max_content_length', 1e7)) +CHUNK_SIZE = 16 * 1024 # 16kb +DOWNLOAD_TIMEOUT = 30 + if toolkit.check_ckan_version(max_version='2.6.99'): from ckan.lib import celery_app @@ -115,9 +130,9 @@ def update_package(ckan_ini_filepath, package_id): try: update_package_(package_id) - except Exception, e: + except Exception as e: log.error('Exception occurred during QA update_package: %s: %s', - e.__class__.__name__, unicode(e)) + e.__class__.__name__, e) raise @@ -154,9 +169,9 @@ def update(ckan_ini_filepath, resource_id): load_config(ckan_ini_filepath) try: update_resource_(resource_id) - except Exception, e: + except Exception as e: log.error('Exception occurred during QA update_resource: %s: %s', - e.__class__.__name__, unicode(e)) + e.__class__.__name__, e) raise @@ -253,10 +268,10 @@ def resource_score(resource): format_ = get_qa_format(resource.id) score_reason = ' '.join(score_reasons) format_ = format_ or None - except Exception, e: + except Exception as e: log.error('Unexpected error while calculating openness score %s: %s\nException: %s', - e.__class__.__name__, unicode(e), traceback.format_exc()) - score_reason = _("Unknown error: %s") % str(e) + e.__class__.__name__, e, traceback.format_exc()) + score_reason = _("Unknown error: %s") % e raise # Even if we can get the link, we should still treat the resource @@ -296,7 +311,7 @@ def format_date(date): else: return '' messages = [_('File could not be downloaded.'), - _('Reason') + ':', unicode(archival.status) + '.', + _('Reason') + ':', six.text_type(archival.status) + '.', _('Error details: %s.') % archival.reason, _('Attempted on %s.') % format_date(archival.updated)] last_success = format_date(archival.last_success) @@ -352,35 +367,142 @@ def score_by_sniffing_data(archival, resource, score_reasons): return (None, None) # Analyse the cached file filepath = archival.cache_filepath + delete_file = False if not os.path.exists(filepath): - score_reasons.append(_('Cache filepath does not exist: "%s".') % filepath) - return (None, None) - else: - if filepath: + log.debug("%s not found on disk, retrieving from URL %s", + filepath, archival.cache_url) + try: + filepath = _download_url(archival.cache_url).name + delete_file = True + except Exception as e: + score_reasons.append(_('A system error occurred during downloading this file') + '. %s' % e) + return (None, None) + + if filepath: + try: sniffed_format = sniff_file_format(filepath) - score = lib.resource_format_scores().get(sniffed_format['format']) \ - if sniffed_format else None - if sniffed_format: - score_reasons.append(_('Content of file appeared to be format "%s" which receives openness score: %s.') - % (sniffed_format['format'], score)) - return score, sniffed_format['format'] - else: - score_reasons.append(_('The format of the file was not recognized from its contents.')) - return (None, None) + finally: + if delete_file: + try: + os.remove(filepath) + except OSError as e: + log.warn("Unable to remove temporary file %s: %s", filepath, e) + score = lib.resource_format_scores().get(sniffed_format['format']) \ + if sniffed_format else None + if sniffed_format: + score_reasons.append(_('Content of file appeared to be format "%s" which receives openness score: %s.') + % (sniffed_format['format'], score)) + return score, sniffed_format['format'] else: - # No cache_url - if archival.status_id == Status.by_text('Chose not to download'): - score_reasons.append(_('File was not downloaded deliberately') + '. ' - + _('Reason') + ': %s. ' % archival.reason + _('Using other methods to determine file openness.')) - return (None, None) - elif archival.is_broken is None and archival.status_id: - # i.e. 'Download failure' or 'System error during archival' - score_reasons.append(_('A system error occurred during downloading this file') + '. ' - + _('Reason') + ': %s. ' % archival.reason + _('Using other methods to determine file openness.')) - return (None, None) - else: - score_reasons.append(_('This file had not been downloaded at the time of scoring it.')) - return (None, None) + score_reasons.append(_('The format of the file was not recognized from its contents.')) + return (None, None) + else: + # No cache_url + if archival.status_id == Status.by_text('Chose not to download'): + score_reasons.append(_('File was not downloaded deliberately') + '. ' + + _('Reason') + ': %s. ' % archival.reason + _('Using other methods to determine file openness.')) + return (None, None) + elif archival.is_broken is None and archival.status_id: + # i.e. 'Download failure' or 'System error during archival' + score_reasons.append(_('A system error occurred during downloading this file') + '. ' + + _('Reason') + ': %s. ' % archival.reason + _('Using other methods to determine file openness.')) + return (None, None) + else: + score_reasons.append(_('This file had not been downloaded at the time of scoring it.')) + return (None, None) + + +def _download_url(url): + # check scheme + scheme = urlparse.urlsplit(url).scheme + if scheme not in ('http', 'https', 'ftp'): + raise IOError( + 'Only http, https, and ftp resources may be fetched.' + ) + + # fetch the resource data + log.info('Fetching from: {0}'.format(url)) + tmp_file = get_tmp_file(url) + length = 0 + try: + headers = {} + response = get_response(url, headers) + + # download the file to a tempfile on disk + for chunk in response.iter_content(CHUNK_SIZE): + length += len(chunk) + if length > MAX_CONTENT_LENGTH: + log.warn("File size exceeds length limit %s, truncating", MAX_CONTENT_LENGTH) + break + tmp_file.write(chunk) + + except requests.exceptions.HTTPError as error: + # status code error + log.debug('HTTP error: {}'.format(error)) + tmp_file.close() + os.remove(tmp_file.name) + raise requests.exceptions.HTTPError( + error.response.status_code, + "Received a bad HTTP response when trying to download the data file", + url) + except requests.exceptions.Timeout: + log.warning('URL time out after {0}s'.format(DOWNLOAD_TIMEOUT)) + tmp_file.close() + os.remove(tmp_file.name) + raise IOError('Connection timed out after {}s'.format( + DOWNLOAD_TIMEOUT)) + except requests.exceptions.RequestException as e: + try: + err_message = str(e.reason) + except AttributeError: + err_message = str(e) + log.warning('URL error: {}'.format(err_message)) + tmp_file.close() + os.remove(tmp_file.name) + raise requests.exceptions.HTTPError(None, err_message, url) + + log.info('Downloaded ok - %s', printable_file_size(length)) + tmp_file.seek(0) + return tmp_file + + +def get_response(url, headers): + def get_url(): + kwargs = {'headers': headers, 'timeout': DOWNLOAD_TIMEOUT, + 'verify': SSL_VERIFY, 'stream': True} # just gets the headers for now + if 'ckan.download_proxy' in config: + proxy = config.get('ckan.download_proxy') + kwargs['proxies'] = {'http': proxy, 'https': proxy} + return requests.get(url, **kwargs) + response = get_url() + if response.status_code == 202: + # Seen: https://data-cdfw.opendata.arcgis.com/datasets + # In this case it means it's still processing, so do retries. + # 202 can mean other things, but there's no harm in retries. + wait = 1 + while wait < 120 and response.status_code == 202: + # log.info('Retrying after {}s'.format(wait)) + time.sleep(wait) + response = get_url() + wait *= 3 + response.raise_for_status() + return response + + +def get_tmp_file(url): + filename = url.split('/')[-1].split('#')[0].split('?')[0] + tmp_file = tempfile.NamedTemporaryFile(suffix=filename, delete=False) + return tmp_file + + +def printable_file_size(size_bytes): + if size_bytes == 0: + return '0 bytes' + size_name = ('bytes', 'KB', 'MB', 'GB', 'TB') + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + s = round(size_bytes / p, 1) + return "%s %s" % (s, size_name[i]) def score_by_url_extension(resource, score_reasons): @@ -482,7 +604,7 @@ def save_qa_result(resource, qa_result): import ckan.model as model from ckanext.qa.model import QA - now = datetime.datetime.now() + now = datetime.datetime.utcnow() qa = QA.get_for_resource(resource.id) if not qa: diff --git a/ckanext/qa/tests/fake_ckan.py b/ckanext/qa/tests/fake_ckan.py index 30b85601..c8434cbc 100644 --- a/ckanext/qa/tests/fake_ckan.py +++ b/ckanext/qa/tests/fake_ckan.py @@ -10,12 +10,12 @@ 'last_success': '2008-10-01', 'first_failure': '', 'failure_count': 0, - }), + }), 'stack': '', 'last_updated': '2008-10-10T19:30:37.536836', } } - ) +) request_store = [] task_status = {'archiver': TASK_STATUS_ARCHIVER_OK, diff --git a/ckanext/qa/tests/mock_remote_server.py b/ckanext/qa/tests/mock_remote_server.py index b43fb77d..3761a90d 100644 --- a/ckanext/qa/tests/mock_remote_server.py +++ b/ckanext/qa/tests/mock_remote_server.py @@ -7,6 +7,7 @@ from time import sleep from wsgiref.simple_server import make_server import urllib2 +import six import socket @@ -37,7 +38,7 @@ def serve(self, host='localhost', port_range=(8000, 9000)): This uses context manager to make sure the server is stopped:: >>> with MockTestServer().serve() as addr: - ... print urllib2.urlopen('%s/?content=hello+world').read() + ... print(urllib2.urlopen('%s/?content=hello+world').read()) ... 'hello world' """ @@ -80,8 +81,8 @@ def get_content(cls, varspec): called and its return value used. """ modpath, var = varspec.split(':') - mod = reduce(getattr, modpath.split('.')[1:], __import__(modpath)) - var = reduce(getattr, var.split('.'), mod) + mod = six.moves.reduce(getattr, modpath.split('.')[1:], __import__(modpath)) + var = six.moves.reduce(getattr, var.split('.'), mod) try: return var() except TypeError: @@ -96,7 +97,8 @@ class MockEchoTestServer(MockHTTPServer): a 500 error response: 'http://localhost/?status=500' a 200 OK response, returning the function's docstring: - 'http://localhost/?status=200;content-type=text/plain;content_var=ckan.tests.lib.test_package_search:test_wsgi_app.__doc__' + 'http://localhost/?status=200;content-type=text/plain;content_var + =ckan.tests.lib.test_package_search:test_wsgi_app.__doc__' To specify content, use: @@ -113,10 +115,16 @@ def __call__(self, environ, start_response): if 'content_var' in request.str_params: content = request.str_params.get('content_var') content = self.get_content(content) + elif 'content_long' in request.str_params: + content = '*' * 1000001 else: content = request.str_params.get('content', '') + if 'method' in request.str_params \ + and request.method.lower() != request.str_params['method'].lower(): + content = '' + status = 405 - if isinstance(content, unicode): + if isinstance(content, six.text_type): raise TypeError("Expected raw byte string for content") headers = [ @@ -124,8 +132,11 @@ def __call__(self, environ, start_response): for item in request.str_params.items() if item[0] not in ('content', 'status') ] - if content: - headers += [('Content-Length', str(len(content)))] + if 'length' in request.str_params: + cl = request.str_params.get('length') + headers += [('Content-Length', cl)] + elif content and 'no-content-length' not in request.str_params: + headers += [('Content-Length', six.binary_type(len(content)))] start_response( '%d %s' % (status, responses[status]), headers diff --git a/ckanext/qa/tests/test_link_checker.py b/ckanext/qa/tests/test_link_checker.py index 550a016e..cd8a79a6 100644 --- a/ckanext/qa/tests/test_link_checker.py +++ b/ckanext/qa/tests/test_link_checker.py @@ -121,12 +121,12 @@ def test_colon_in_query_string(self, url): # accept, because browsers accept this # see discussion: http://trac.ckan.org/ticket/318 result = self.check_link(url) - print result + print(result) assert_equal(result['url_errors'], []) @with_mock_url('?status=200 ') def test_trailing_whitespace(self, url): # accept, because browsers accept this result = self.check_link(url) - print result + print(result) assert_equal(result['url_errors'], []) diff --git a/ckanext/qa/tests/test_sniff_format.py b/ckanext/qa/tests/test_sniff_format.py index f7b86577..86039bc8 100644 --- a/ckanext/qa/tests/test_sniff_format.py +++ b/ckanext/qa/tests/test_sniff_format.py @@ -30,7 +30,7 @@ def assert_file_has_format_sniffed_correctly(cls, format_extension, filepath): '''Given a filepath, checks the sniffed format matches the format_extension.''' expected_format = format_extension sniffed_format = sniff_file_format(filepath) - assert sniffed_format, expected_format + assert sniffed_format, "Expected {} but failed to sniff any format for file: {}".format(expected_format, filepath) expected_format_without_zip = expected_format.replace('.zip', '') assert_equal(sniffed_format['format'].lower(), expected_format_without_zip) @@ -49,16 +49,11 @@ def assert_file_has_format_sniffed_correctly(cls, format_extension, filepath): def check_format(cls, format, filename=None): for format_extension, filepath in cls.fixture_files: if format_extension == format: - if filename: - if filename in filepath: - break - else: - continue - else: + if not filename or filename in filepath: + cls.assert_file_has_format_sniffed_correctly(format_extension, filepath) break else: assert 0, format # Could not find fixture for format - cls.assert_file_has_format_sniffed_correctly(format_extension, filepath) def test_xls(self): self.check_format('xls', '10-p108-data-results') @@ -292,5 +287,5 @@ def test_turtle_regex(): def test_is_ttl__num_triples(): triple = ' ; .' - assert not is_ttl('\n'.join([triple]*2)) - assert is_ttl('\n'.join([triple]*5)) + assert not is_ttl('\n'.join([triple] * 2)) + assert is_ttl('\n'.join([triple] * 5)) diff --git a/ckanext/qa/tests/test_tasks.py b/ckanext/qa/tests/test_tasks.py index 4a9bf3bd..9f59f207 100644 --- a/ckanext/qa/tests/test_tasks.py +++ b/ckanext/qa/tests/test_tasks.py @@ -78,7 +78,7 @@ def test_trigger_on_archival(cls): context = {'model': model, 'ignore_auth': True, 'session': model.Session, 'user': 'test'} pkg = {'name': 'testpkg', 'license_id': 'uk-ogl', 'resources': [ {'url': 'http://test.com/', 'format': 'CSV', 'description': 'Test'} - ]} + ]} pkg = get_action('package_create')(context, pkg) resource_dict = pkg['resources'][0] res_id = resource_dict['id'] @@ -304,7 +304,7 @@ def get_qa_result(cls, **kwargs): 'openness_score_reason': 'Detected as CSV which scores 3', 'format': 'CSV', 'archival_timestamp': datetime.datetime(2015, 12, 16), - } + } qa_result.update(kwargs) return qa_result @@ -335,7 +335,7 @@ def test_simple(self): 'url': 'http://example.com/file.csv', 'title': 'Some data', 'format': '', - } + } dataset = ckan_factories.Dataset(resources=[resource]) resource = model.Resource.get(dataset['resources'][0]['id']) @@ -359,7 +359,7 @@ def test_simple(self): 'url': 'http://example.com/file.csv', 'title': 'Some data', 'format': '', - } + } dataset = ckan_factories.Dataset(resources=[resource]) resource = model.Resource.get(dataset['resources'][0]['id']) diff --git a/requirements.txt b/requirements.txt index 70da4e40..0c9c70c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,10 @@ -xlrd==1.0.0 -python-magic==0.4.12 +xlrd==1.1.0 +#python-magic==0.4.15 #in ckancore messytables==0.15.2 progressbar==2.3 +#SQLAlchemy>=0.6.6 #in ckancore +#requests==2.11.1 #in ckancore +six>=1.0.0 #in ckancore + +ckanext-archiver +ckanext-report diff --git a/setup.py b/setup.py index f6012055..779aed3d 100644 --- a/setup.py +++ b/setup.py @@ -17,20 +17,16 @@ include_package_data=True, zip_safe=False, install_requires=[ - 'ckanext-archiver>=2.0', - 'ckanext-report', - 'SQLAlchemy>=0.6.6', - 'requests', - 'xlrd>=0.8.0', - 'messytables>=0.8', - 'python-magic>=0.4', - 'progressbar', - 'six>=1.9' # until messytables->html5lib releases https://github.com/html5lib/html5lib-python/pull/301 + # CKAN extensions should not list dependencies here, but in a separate + # ``requirements.txt`` file. + # + # http://docs.ckan.org/en/latest/extensions/best-practices.html#add-third-party-libraries-to-requirements-txt ], tests_require=[ - 'nose', - 'mock', - 'flask' + # CKAN extensions should not list dependencies here, but in a separate + # ``dev-requirements.txt`` file. + # + # http://docs.ckan.org/en/latest/extensions/best-practices.html#add-third-party-libraries-to-requirements-txt ], entry_points=''' [paste.paster_command]