diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..5d0fffe4 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,69 @@ +name: Tests +on: [push, pull_request] +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v2 + with: + python-version: '3.6' + - name: Install requirements + run: pip install flake8 pycodestyle + - name: Check syntax + run: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan + - name: Run flake8 + run: flake8 . --count --max-line-length=127 --statistics --exclude ckan + + test: + needs: lint + strategy: + matrix: + ckan-version: [2.9, 2.9-py2, 2.8, 2.7] + fail-fast: false + + name: CKAN ${{ matrix.ckan-version }} + runs-on: ubuntu-latest + container: + image: openknowledge/ckan-dev:${{ matrix.ckan-version }} + services: + solr: + image: ckan/ckan-solr:${{ matrix.ckan-version }} + postgres: + image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }} + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + redis: + image: redis:3 + env: + CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test + CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test + CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test + CKAN_SOLR_URL: http://solr:8983/solr/ckan + CKAN_REDIS_URL: redis://redis:6379/1 + + steps: + - uses: actions/checkout@v3 + - name: Install requirements + run: | + apk add file + pip install -r requirements.txt + pip install -r dev-requirements.txt + pip install -e . + # Replace default path to CKAN core config file with the one on the container + sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini + - name: Setup extension (CKAN >= 2.9) + if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }} + run: | + ckan -c test.ini db init + ckan -c test.ini qa init + - name: Setup extension (CKAN < 2.9) + if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }} + run: | + paster --plugin=ckan db init -c test.ini + paster --plugin=ckanext-qa qa init -c test.ini + - name: Run tests + run: pytest --ckan-ini=test.ini --cov=ckanext.qa --disable-warnings ckanext/qa/tests \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 94b0015c..00000000 --- a/.travis.yml +++ /dev/null @@ -1,43 +0,0 @@ -language: python -python: - - "2.7" -env: - - CKANVERSION=master - - CKANVERSION=2.3 - - CKANVERSION=2.4 - - CKANVERSION=2.5 - - CKANVERSION=2.6 - - CKANVERSION=2.7 - - CKANVERSION=2.8 -services: - - redis-server - - postgresql -install: - - bash bin/travis-build.bash - - pip install coveralls -script: sh bin/travis-run.sh -after_success: - - coveralls - -# the new trusty images of Travis cause build errors with psycopg2, see https://github.com/travis-ci/travis-ci/issues/8897 -dist: trusty -group: deprecated-2017Q4 - -stages: - - Flake8 - - test - -jobs: - include: - - stage: Flake8 - env: FLAKE8=True - install: - - bash bin/travis-build.bash - - pip install flake8==3.5.0 - - pip install pycodestyle==2.3.0 - script: - - flake8 --version - # stop the build if there are Python syntax errors or undefined names - - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan,ckanext-archiver - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude ckan,ckanext-archiver diff --git a/README.rst b/README.rst index fa2fcfc8..15e90a01 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,8 @@ these badges work. The necessary Travis and Coverage config files have been generated for you. -.. image:: https://travis-ci.org/ckan/ckanext-qa.svg?branch=master - :target: https://travis-ci.org/ckan/ckanext-qa +.. image:: https://github.com/ckan/ckanext-qa/actions/workflows/test.yml/badge.svg + :target: https://github.com/ckan/ckanext-qa/actions/workflows/test.yml CKAN QA Extension (Quality Assurance) ===================================== @@ -31,7 +31,7 @@ Requirements Before installing ckanext-qa, make sure that you have installed the following: -* CKAN 2.1+ +* CKAN 2.1+ (tests are only running for CKAN 2.7+) * ckanext-archiver 2.0+ (https://github.com/ckan/ckanext-archiver) * ckanext-report (https://github.com/datagovuk/ckanext-report) for reporting diff --git a/bin/travis-build.bash b/bin/travis-build.bash deleted file mode 100644 index 4c8892a8..00000000 --- a/bin/travis-build.bash +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -set -e -set -x # echo on - -echo "This is travis-build.bash..." - -echo "Installing the packages that CKAN requires..." -sudo apt-get update -qq -sudo apt-get install solr-jetty libcommons-fileupload-java - -echo "Upgrading libmagic for ckanext-qa..." -# appears to upgrade it from 5.09-2 to 5.09-2ubuntu0.6 which seems to help the tests -sudo apt-get install libmagic1 - -echo "Installing CKAN and its Python dependencies..." -git clone https://github.com/ckan/ckan -cd ckan - -if [ $CKANVERSION == 'master' ] -then - echo "CKAN version: master" -else - CKAN_TAG=$(git tag | grep ^ckan-$CKANVERSION | sort --version-sort | tail -n 1) - git checkout $CKAN_TAG - echo "CKAN version: ${CKAN_TAG#ckan-}" -fi - -python setup.py develop -pip install -r requirements.txt --allow-all-external -pip install -r dev-requirements.txt --allow-all-external -cd - - -echo "Setting up Solr..." -# solr is multicore for tests on ckan master now, but it's easier to run tests -# on Travis single-core still. -# see https://github.com/ckan/ckan/issues/2972 -sed -i -e 's/solr_url.*/solr_url = http:\/\/127.0.0.1:8983\/solr/' ckan/test-core.ini -printf "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" | sudo tee /etc/default/jetty -sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml -sudo service jetty restart - -echo "Creating the PostgreSQL user and database..." -sudo -u postgres psql -c "CREATE USER ckan_default WITH PASSWORD 'pass';" -sudo -u postgres psql -c 'CREATE DATABASE ckan_test WITH OWNER ckan_default;' - -echo "Initialising the database..." -cd ckan -paster db init -c test-core.ini -cd - - -echo "Installing dependency ckanext-report and its requirements..." -pip install -e git+https://github.com/datagovuk/ckanext-report.git#egg=ckanext-report - -echo "Installing dependency ckanext-archiver and its requirements..." -git clone https://github.com/ckan/ckanext-archiver.git -cd ckanext-archiver -pip install -e . -pip install -r requirements.txt -cd - - -echo "Installing ckanext-qa and its requirements..." -python setup.py develop -pip install -r requirements.txt -pip install -r dev-requirements.txt - -echo "Moving test-core.ini into a subdir..." -mkdir subdir -mv test-core.ini subdir - -echo "travis-build.bash is done." diff --git a/bin/travis-run.sh b/bin/travis-run.sh deleted file mode 100644 index 5c4022b7..00000000 --- a/bin/travis-run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -e - -echo "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" | sudo tee /etc/default/jetty -sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml -sudo service jetty restart -nosetests --with-pylons=subdir/test-core.ini --with-coverage --cover-package=ckanext.archiver --cover-inclusive --cover-erase --cover-tests diff --git a/ckanext/qa/bin/common.py b/ckanext/qa/bin/common.py index 0ace784e..43d408c4 100644 --- a/ckanext/qa/bin/common.py +++ b/ckanext/qa/bin/common.py @@ -1,29 +1,3 @@ -import os - - -def load_config(config_filepath): - import paste.deploy - config_abs_path = os.path.abspath(config_filepath) - conf = paste.deploy.appconfig('config:' + config_abs_path) - import ckan - ckan.config.environment.load_environment(conf.global_conf, - conf.local_conf) - - -def register_translator(): - # Register a translator in this thread so that - # the _() functions in logic layer can work - from paste.registry import Registry - from pylons import translator - from ckan.lib.cli import MockTranslator - global registry - registry = Registry() - registry.prepare() - global translator_obj - translator_obj = MockTranslator() - registry.register(translator, translator_obj) - - def get_resources(state='active', publisher_ref=None, resource_id=None, dataset_name=None): ''' Returns all active resources, or filtered by the given criteria. ''' from ckan import model @@ -48,5 +22,5 @@ def get_resources(state='active', publisher_ref=None, resource_id=None, dataset_ resources = resources.filter(model.Resource.id == resource_id) criteria.append('Resource:%s' % resource_id) resources = resources.all() - print '%i resources (%s)' % (len(resources), ' '.join(criteria)) + print('%i resources (%s)' % (len(resources), ' '.join(criteria))) return resources diff --git a/ckanext/qa/bin/migrate_task_status.py b/ckanext/qa/bin/migrate_task_status.py index f57b1bf5..a60723e5 100644 --- a/ckanext/qa/bin/migrate_task_status.py +++ b/ckanext/qa/bin/migrate_task_status.py @@ -59,7 +59,7 @@ def migrate(options): # time, so some timezone nonesense going on. Can't do much. archival = Archival.get_for_resource(res.id) if not archival: - print add_stat('QA but no Archival data', res, stats) + print(add_stat('QA but no Archival data', res, stats)) continue archival_date = archival.updated # the state of the resource was as it was archived on the date of @@ -112,10 +112,10 @@ def migrate(options): model.Session.add(qa) add_stat('Added to QA table', res, stats) - print 'Summary\n', stats.report() + print('Summary\n', stats.report()) if options.write: model.repo.commit_and_remove() - print 'Written' + print('Written') def add_stat(outcome, res, stats, extra_info=None): @@ -154,10 +154,7 @@ def date_str_to_datetime_or_none(date_str): if len(args) != 1: parser.error('Wrong number of arguments (%i)' % len(args)) config_ini = args[0] - print 'Loading CKAN config...' - common.load_config(config_ini) - common.register_translator() - print 'Done' + # Setup logging to print debug out for local only rootLogger = logging.getLogger() rootLogger.setLevel(logging.WARNING) diff --git a/ckanext/qa/bin/running_stats.py b/ckanext/qa/bin/running_stats.py index 947797aa..11051ee5 100644 --- a/ckanext/qa/bin/running_stats.py +++ b/ckanext/qa/bin/running_stats.py @@ -110,6 +110,6 @@ def report_value(self, category): package_stats.add('Success', 'good3') package_stats.add('Success', 'good4') package_stats.add('Failure', 'bad1') - print package_stats.report() + print(package_stats.report()) - print StatsList().report() + print(StatsList().report()) diff --git a/ckanext/qa/cli.py b/ckanext/qa/cli.py new file mode 100644 index 00000000..8e2dd2f0 --- /dev/null +++ b/ckanext/qa/cli.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +import sys +import click +import ckanext.qa.utils as utils + + +def get_commands(): + return [qa] + + +@click.group() +def qa(): + """ + QA analysis of CKAN resources + + Usage:: + + ckan -c qa init + - Creates the database tables that QA expects for storing + results + + ckan -c qa [options] update [dataset/group name/id] + - QA analysis on all resources in a given dataset, or on all + datasets if no dataset given + + ckan -c qa sniff {filepath} + - Opens the file and determines its type by the contents + + ckan -c qa view [dataset name/id] + - See package score information + + ckan -c qa clean + - Remove all package score information + + ckan -c qa migrate1 + - Migrates the way results are stored in task_status, + with commit 6f63ab9e 20th March 2013 + (from key='openness_score'/'openness_score_failure_count' to + key='status') + + The commands should be run from the ckanext-qa directory and expect + a development.ini file to be present. Most of the time you will + specify the config explicitly though:: + + ckan -c qa update + """ + + +@qa.command() +def init(): + utils.init_db() + + +@qa.command() +@click.argument('ids', nargs=-1) +@click.option('-q', '--queue', help='Send to a particular queue') +def update(ids, queue): + utils.update(ids, queue) + + +@qa.command() +@click.argument('filepaths', nargs=-1) +def sniff(filepaths): + if len(filepaths) < 1: + print('Not enough arguments', filepaths) + sys.exit(1) + + utils.sniff(filepaths) + + +@qa.command() +@click.argument('package_ref') +def view(package_ref=None): + utils.view(package_ref) + + +@qa.command() +def clean(): + utils.clean() + + +@qa.command() +def migrate1(): + utils.migrate1() diff --git a/ckanext/qa/commands.py b/ckanext/qa/commands.py index 992fb0cd..f43f1a25 100644 --- a/ckanext/qa/commands.py +++ b/ckanext/qa/commands.py @@ -1,9 +1,7 @@ import logging import sys - -from sqlalchemy import or_ - import ckan.plugins as p +from ckanext.qa.utils import init_db, update, sniff, view, clean, migrate1 REQUESTS_HEADER = {'content-type': 'application/json', 'User-Agent': 'ckanext-qa commands'} @@ -65,7 +63,7 @@ def command(self): Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ['--help', '-h', 'help']: - print QACommand.__doc__ + print(QACommand.__doc__) return cmd = self.args[0] @@ -94,186 +92,24 @@ def command(self): self.log.error('Command "%s" not recognized' % (cmd,)) def init_db(self): - import ckan.model as model - from ckanext.qa.model import init_tables - init_tables(model.meta.engine) + init_db() def update(self): - from ckan import model - from ckanext.qa import lib - packages = [] - resources = [] if len(self.args) > 1: - for arg in self.args[1:]: - # try arg as a group id/name - group = model.Group.get(arg) - if group and group.is_organization: - # group.packages() is unreliable for an organization - - # member objects are not definitive whereas owner_org, so - # get packages using owner_org - query = model.Session.query(model.Package)\ - .filter( - or_(model.Package.state == 'active', - model.Package.state == 'pending'))\ - .filter_by(owner_org=group.id) - packages.extend(query.all()) - if not self.options.queue: - self.options.queue = 'bulk' - continue - elif group: - packages.extend(group.packages()) - if not self.options.queue: - self.options.queue = 'bulk' - continue - # try arg as a package id/name - pkg = model.Package.get(arg) - if pkg: - packages.append(pkg) - if not self.options.queue: - self.options.queue = 'priority' - continue - # try arg as a resource id - res = model.Resource.get(arg) - if res: - resources.append(res) - if not self.options.queue: - self.options.queue = 'priority' - continue - else: - self.log.error('Could not recognize as a group, package ' - 'or resource: %r', arg) - sys.exit(1) - else: - # all packages - pkgs = model.Session.query(model.Package)\ - .filter_by(state='active')\ - .order_by('name').all() - packages.extend(pkgs) - if not self.options.queue: - self.options.queue = 'bulk' - - if packages: - self.log.info('Datasets to QA: %d', len(packages)) - if resources: - self.log.info('Resources to QA: %d', len(resources)) - if not (packages or resources): - self.log.error('No datasets or resources to process') - sys.exit(1) - - self.log.info('Queue: %s', self.options.queue) - for package in packages: - lib.create_qa_update_package_task(package, self.options.queue) - self.log.info('Queuing dataset %s (%s resources)', - package.name, len(package.resources)) - - for resource in resources: - package = resource.resource_group.package - self.log.info('Queuing resource %s/%s', package.name, resource.id) - lib.create_qa_update_task(resource, self.options.queue) - - self.log.info('Completed queueing') + ids = self.args[1:] + update(ids, self.options.queue) def sniff(self): - from ckanext.qa.sniff_format import sniff_file_format - if len(self.args) < 2: - print 'Not enough arguments', self.args + print('Not enough arguments', self.args) sys.exit(1) - for filepath in self.args[1:]: - format_ = sniff_file_format( - filepath, logging.getLogger('ckanext.qa.sniffer')) - if format_: - print 'Detected as: %s - %s' % (format_['display_name'], - filepath) - else: - print 'ERROR: Could not recognise format of: %s' % filepath + sniff(self.args[1:]) def view(self, package_ref=None): - from ckan import model - - q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') - print 'QA records - %i TaskStatus rows' % q.count() - print ' across %i Resources' % q.distinct('entity_id').count() - - if package_ref: - pkg = model.Package.get(package_ref) - print 'Package %s %s' % (pkg.name, pkg.id) - for res in pkg.resources: - print 'Resource %s' % res.id - for row in q.filter_by(entity_id=res.id): - print '* %s = %r error=%r' % (row.key, row.value, - row.error) + view(package_ref) def clean(self): - from ckan import model - - print 'Before:' - self.view() - - q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') - q.delete() - model.Session.commit() - - print 'After:' - self.view() + clean() def migrate1(self): - from ckan import model - from ckan.lib.helpers import json - q_status = model.Session.query(model.TaskStatus) \ - .filter_by(task_type='qa') \ - .filter_by(key='status') - print '* %s with "status" will be deleted e.g. %s' % (q_status.count(), - q_status.first()) - q_failures = model.Session.query(model.TaskStatus) \ - .filter_by(task_type='qa') \ - .filter_by(key='openness_score_failure_count') - print '* %s with openness_score_failure_count to be deleted e.g.\n%s'\ - % (q_failures.count(), q_failures.first()) - q_score = model.Session.query(model.TaskStatus) \ - .filter_by(task_type='qa') \ - .filter_by(key='openness_score') - print '* %s with openness_score to migrate e.g.\n%s' % \ - (q_score.count(), q_score.first()) - q_reason = model.Session.query(model.TaskStatus) \ - .filter_by(task_type='qa') \ - .filter_by(key='openness_score_reason') - print '* %s with openness_score_reason to migrate e.g.\n%s' % \ - (q_reason.count(), q_reason.first()) - raw_input('Press Enter to continue') - - q_status.delete() - model.Session.commit() - print '..."status" deleted' - - q_failures.delete() - model.Session.commit() - print '..."openness_score_failure_count" deleted' - - for task_status in q_score: - reason_task_status = q_reason \ - .filter_by(entity_id=task_status.entity_id) \ - .first() - if reason_task_status: - reason = reason_task_status.value - reason_task_status.delete() - else: - reason = None - - task_status.key = 'status' - task_status.error = json.dumps({ - 'reason': reason, - 'format': None, - 'is_broken': None, - }) - model.Session.commit() - print '..."openness_score" and "openness_score_reason" migrated' - - count = q_reason.count() - q_reason.delete() - model.Session.commit() - print '... %i remaining "openness_score_reason" deleted' % count - - model.Session.flush() - model.Session.remove() - print 'Migration succeeded' + migrate1() diff --git a/ckanext/qa/controllers.py b/ckanext/qa/controllers.py index 493eed7f..4cedcbb5 100644 --- a/ckanext/qa/controllers.py +++ b/ckanext/qa/controllers.py @@ -102,7 +102,7 @@ def _check_link(self, url): result['mimetype'] = self._extract_mimetype(headers) result['size'] = headers.get('content-length', '') result['last_modified'] = self._parse_and_format_date(headers.get('last-modified', '')) - except LinkCheckerError, e: + except LinkCheckerError as e: result['url_errors'].append(str(e)) return result diff --git a/ckanext/qa/lib.py b/ckanext/qa/lib.py index 2113badd..d1826884 100644 --- a/ckanext/qa/lib.py +++ b/ckanext/qa/lib.py @@ -2,11 +2,10 @@ import json import re import logging - -from pylons import config +from ckan.plugins.toolkit import config from ckan import plugins as p -import tasks +from ckanext.qa.tasks import update_package, update log = logging.getLogger(__name__) @@ -46,16 +45,15 @@ def resource_format_scores(): if not _RESOURCE_FORMAT_SCORES: _RESOURCE_FORMAT_SCORES = {} json_filepath = config.get('qa.resource_format_openness_scores_json') - import ckanext.qa.plugin if not json_filepath: json_filepath = os.path.join( - os.path.dirname(os.path.realpath(ckanext.qa.plugin.__file__)), + os.path.dirname(os.path.realpath(__file__)), 'resource_format_openness_scores.json' ) with open(json_filepath) as format_file: try: file_resource_formats = json.loads(format_file.read()) - except ValueError, e: + except ValueError as e: # includes simplejson.decoder.JSONDecodeError raise ValueError('Invalid JSON syntax in %s: %s' % (json_filepath, e)) @@ -87,23 +85,19 @@ def munge_format_to_be_canonical(format_name): def create_qa_update_package_task(package, queue): - from pylons import config - ckan_ini_filepath = os.path.abspath(config.__file__) - compat_enqueue('qa.update_package', tasks.update_package, queue, args=[ckan_ini_filepath, package.id]) + compat_enqueue('qa.update_package', update_package, queue, args=[package.id]) log.debug('QA of package put into celery queue %s: %s', queue, package.name) def create_qa_update_task(resource, queue): - from pylons import config if p.toolkit.check_ckan_version(max_version='2.2.99'): package = resource.resource_group.package else: package = resource.package - ckan_ini_filepath = os.path.abspath(config.__file__) - compat_enqueue('qa.update', tasks.update, queue, args=[ckan_ini_filepath, resource.id]) + compat_enqueue('qa.update', update, queue, args=[resource.id]) log.debug('QA of resource put into celery queue %s: %s/%s url=%r', queue, package.name, resource.id, resource.url) diff --git a/ckanext/qa/model.py b/ckanext/qa/model.py index bf81073d..0a515ce2 100644 --- a/ckanext/qa/model.py +++ b/ckanext/qa/model.py @@ -1,3 +1,4 @@ +import sys import uuid import datetime import six @@ -15,6 +16,10 @@ Base = declarative_base() +if sys.version_info[0] >= 3: + unicode = str + + def make_uuid(): return six.text_type(uuid.uuid4()) diff --git a/ckanext/qa/plugin.py b/ckanext/qa/plugin/__init__.py similarity index 77% rename from ckanext/qa/plugin.py rename to ckanext/qa/plugin/__init__.py index 876459d1..9023cc37 100644 --- a/ckanext/qa/plugin.py +++ b/ckanext/qa/plugin/__init__.py @@ -2,21 +2,27 @@ import ckan.model as model import ckan.plugins as p +from ckan.plugins import toolkit from ckanext.archiver.interfaces import IPipe -from logic import action, auth -from model import QA, aggregate_qa_for_a_dataset -import helpers -import lib +from ckanext.qa.logic import action, auth +from ckanext.qa.model import QA, aggregate_qa_for_a_dataset +from ckanext.qa.helpers import qa_openness_stars_resource_html, qa_openness_stars_dataset_html +from ckanext.qa.lib import create_qa_update_package_task from ckanext.report.interfaces import IReport log = logging.getLogger(__name__) -class QAPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm): +if toolkit.check_ckan_version(min_version='2.9.0'): + from ckanext.qa.plugin.flask_plugin import MixinPlugin +else: + from ckanext.qa.plugin.pylons_plugin import MixinPlugin + + +class QAPlugin(MixinPlugin, p.SingletonPlugin, toolkit.DefaultDatasetForm): p.implements(p.IConfigurer, inherit=True) - p.implements(p.IRoutes, inherit=True) p.implements(IPipe, inherit=True) p.implements(IReport) p.implements(p.IActions) @@ -27,18 +33,7 @@ class QAPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm): # IConfigurer def update_config(self, config): - p.toolkit.add_template_directory(config, 'templates') - - # IRoutes - - def before_map(self, map): - # Link checker - deprecated - res = 'ckanext.qa.controllers:LinkCheckerController' - map.connect('qa_resource_checklink', '/qa/link_checker', - conditions=dict(method=['GET']), - controller=res, - action='check_link') - return map + toolkit.add_template_directory(config, 'templates') # IPipe @@ -52,7 +47,7 @@ def receive_data(self, operation, queue, **params): dataset = model.Package.get(dataset_id) assert dataset - lib.create_qa_update_package_task(dataset, queue=queue) + create_qa_update_package_task(dataset, queue=queue) # IReport @@ -82,9 +77,9 @@ def get_auth_functions(self): def get_helpers(self): return { 'qa_openness_stars_resource_html': - helpers.qa_openness_stars_resource_html, + qa_openness_stars_resource_html, 'qa_openness_stars_dataset_html': - helpers.qa_openness_stars_dataset_html, + qa_openness_stars_dataset_html, } # IPackageController diff --git a/ckanext/qa/plugin/flask_plugin.py b/ckanext/qa/plugin/flask_plugin.py new file mode 100644 index 00000000..a6c3ba87 --- /dev/null +++ b/ckanext/qa/plugin/flask_plugin.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +import ckan.plugins as p +import ckanext.qa.views as views +import ckanext.qa.cli as cli + + +class MixinPlugin(p.SingletonPlugin): + p.implements(p.IBlueprint) + p.implements(p.IClick) + + # IBlueprint + + def get_blueprint(self): + return views.get_blueprints() + + # IClick + + def get_commands(self): + return cli.get_commands() diff --git a/ckanext/qa/plugin/pylons_plugin.py b/ckanext/qa/plugin/pylons_plugin.py new file mode 100644 index 00000000..560a4bae --- /dev/null +++ b/ckanext/qa/plugin/pylons_plugin.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + +import ckan.plugins as p + + +class MixinPlugin(p.SingletonPlugin): + p.implements(p.IRoutes, inherit=True) + + # IRoutes + def before_map(self, map): + # Link checker - deprecated + res = 'ckanext.qa.controllers:LinkCheckerController' + map.connect('qa_resource_checklink', '/qa/link_checker', + conditions=dict(method=['GET']), + controller=res, + action='check_link') + return map diff --git a/ckanext/qa/sniff_format.py b/ckanext/qa/sniff_format.py index b22f9ad6..7bf3e2e8 100644 --- a/ckanext/qa/sniff_format.py +++ b/ckanext/qa/sniff_format.py @@ -1,17 +1,23 @@ +# -*- coding: utf-8 -*- + +from io import BytesIO, open +import sys import re import zipfile import os from collections import defaultdict import subprocess -import StringIO import xlrd import magic import messytables -import lib from ckan.lib import helpers as ckan_helpers + +if sys.version_info[0] >= 3: + unicode = str + import logging log = logging.getLogger(__name__) @@ -39,8 +45,8 @@ def sniff_file_format(filepath): mime_type = magic.from_file(filepath_utf8, mime=True) log.info('Magic detects file as: %s', mime_type) if mime_type: - if mime_type == 'application/xml': - with open(filepath) as f: + if mime_type in ('application/xml', 'text/xml'): + with open(filepath, 'r', encoding='ISO-8859-1') as f: buf = f.read(5000) format_ = get_xml_variant_including_xml_declaration(buf) elif mime_type == 'application/zip': @@ -59,15 +65,22 @@ def sniff_file_format(filepath): # e.g. Shapefile format_ = run_bsd_file(filepath) if not format_: - with open(filepath) as f: + with open(filepath, 'r', encoding='ISO-8859-1') as f: buf = f.read(500) format_ = is_html(buf) elif mime_type == 'text/html': # Magic can mistake IATI for HTML - with open(filepath) as f: + with open(filepath, 'r', encoding='ISO-8859-1') as f: buf = f.read(100) if is_iati(buf): format_ = {'format': 'IATI'} + elif mime_type == 'application/csv': + with open(filepath, 'r', encoding='ISO-8859-1', newline=None) as f: + buf = f.read(10000) + if is_csv(buf): + format_ = {'format': 'CSV'} + elif is_psv(buf): + format_ = {'format': 'PSV'} if format_: return format_ @@ -79,7 +92,7 @@ def sniff_file_format(filepath): if not format_: if mime_type.startswith('text/'): # is it JSON? - with open(filepath, 'rU') as f: + with open(filepath, 'r', encoding='ISO-8859-1', newline=None) as f: buf = f.read(10000) if is_json(buf): format_ = {'format': 'JSON'} @@ -99,7 +112,7 @@ def sniff_file_format(filepath): if format_['format'] == 'TXT': # is it JSON? - with open(filepath, 'rU') as f: + with open(filepath, 'r', encoding='ISO-8859-1', newline=None) as f: buf = f.read(10000) if is_json(buf): format_ = {'format': 'JSON'} @@ -116,7 +129,7 @@ def sniff_file_format(filepath): elif format_['format'] == 'HTML': # maybe it has RDFa in it - with open(filepath) as f: + with open(filepath, 'r', encoding='ISO-8859-1') as f: buf = f.read(100000) if has_rdfa(buf): format_ = {'format': 'RDFa'} @@ -202,14 +215,14 @@ def is_json(buf): def is_csv(buf): '''If the buffer is a CSV file then return True.''' - buf_rows = StringIO.StringIO(buf) + buf_rows = BytesIO(buf.encode('ISO-8859-1')) table_set = messytables.CSVTableSet(buf_rows) return _is_spreadsheet(table_set, 'CSV') def is_psv(buf): '''If the buffer is a PSV file then return True.''' - buf_rows = StringIO.StringIO(buf) + buf_rows = BytesIO(buf.encode('ISO-8859-1')) table_set = messytables.CSVTableSet(buf_rows, delimiter='|') return _is_spreadsheet(table_set, 'PSV') @@ -320,10 +333,10 @@ def start_element(name, attrs): p = xml.parsers.expat.ParserCreate() p.StartElementHandler = start_element try: - p.Parse(buf) - except GotFirstTag, e: + p.Parse(buf.encode('ISO-8859-1')) + except GotFirstTag as e: top_level_tag_name = str(e).lower() - except xml.sax.SAXException, e: + except xml.sax.SAXException as e: log.info('Sax parse error: %s %s', e, buf) return {'format': 'XML'} @@ -375,6 +388,7 @@ def get_zipped_format(filepath): '''For a given zip file, return the format of file inside. For multiple files, choose by the most open, and then by the most popular extension.''' + from ckanext.qa.lib import resource_format_scores # just check filename extension of each file inside try: # note: Cannot use "with" with a zipfile before python 2.7 @@ -384,11 +398,11 @@ def get_zipped_format(filepath): filepaths = zip.namelist() finally: zip.close() - except zipfile.BadZipfile, e: + except zipfile.BadZipfile as e: log.info('Zip file open raised error %s: %s', e, e.args) return - except Exception, e: + except Exception as e: log.warning('Zip file open raised exception %s: %s', e, e.args) return @@ -413,7 +427,7 @@ def get_zipped_format(filepath): extension = os.path.splitext(filepath)[-1][1:].lower() format_tuple = ckan_helpers.resource_formats().get(extension) if format_tuple: - score = lib.resource_format_scores().get(format_tuple[1]) + score = resource_format_scores().get(format_tuple[1]) if score is not None and score > top_score: top_score = score top_scoring_extension_counts = defaultdict(int) @@ -441,7 +455,7 @@ def get_zipped_format(filepath): def is_excel(filepath): try: xlrd.open_workbook(filepath) - except Exception, e: + except Exception as e: log.info('Not Excel - failed to load: %s %s', e, e.args) return False else: @@ -468,7 +482,7 @@ def run_bsd_file(filepath): '''Run the BSD command-line tool "file" to determine file type. Returns a format dict or None if it fails.''' result = check_output(['file', filepath]) - match = re.search('Name of Creating Application: ([^,]*),', result) + match = re.search(b'Name of Creating Application: ([^,]*),', result) if match: app_name = match.groups()[0] format_map = {'Microsoft Office PowerPoint': 'ppt', @@ -484,7 +498,7 @@ def run_bsd_file(filepath): log.info('"file" detected file format: %s', format_tuple[2]) return {'format': format_tuple[1]} - match = re.search(': ESRI Shapefile', result) + match = re.search(b': ESRI Shapefile', result) if match: format_ = {'format': 'SHP'} log.info('"file" detected file format: %s', diff --git a/ckanext/qa/tasks.py b/ckanext/qa/tasks.py index c3ba6865..33aac4e9 100644 --- a/ckanext/qa/tasks.py +++ b/ckanext/qa/tasks.py @@ -2,26 +2,26 @@ Provide some Quality Assurance by scoring datasets against Sir Tim Berners-Lee\'s five stars of openness ''' +import sys import datetime import json import os import traceback -import urlparse -import routes from ckan.common import _ -from ckan.lib import i18n from ckan.plugins import toolkit import ckan.lib.helpers as ckan_helpers -from sniff_format import sniff_file_format -import lib +from ckanext.qa.sniff_format import sniff_file_format from ckanext.archiver.model import Archival, Status import logging log = logging.getLogger(__name__) +if sys.version_info[0] >= 3: + unicode = str + if toolkit.check_ckan_version(max_version='2.6.99'): from ckan.lib import celery_app @@ -49,76 +49,17 @@ class QAError(Exception): } -def register_translator(): - # Register a translator in this thread so that - # the _() functions in logic layer can work - from paste.registry import Registry - from pylons import translator - from ckan.lib.cli import MockTranslator - global registry - registry = Registry() - registry.prepare() - global translator_obj - translator_obj = MockTranslator() - registry.register(translator, translator_obj) - - -def load_config(ckan_ini_filepath): - import paste.deploy - config_abs_path = os.path.abspath(ckan_ini_filepath) - conf = paste.deploy.appconfig('config:' + config_abs_path) - import ckan - ckan.config.environment.load_environment(conf.global_conf, - conf.local_conf) - - # give routes enough information to run url_for - parsed = urlparse.urlparse(conf.get('ckan.site_url', 'http://0.0.0.0')) - request_config = routes.request_config() - request_config.host = parsed.netloc + parsed.path - request_config.protocol = parsed.scheme - - load_translations(conf.get('ckan.locale_default', 'en')) - - -def load_translations(lang): - # Register a translator in this thread so that - # the _() functions in logic layer can work - from paste.registry import Registry - from pylons import translator - from pylons import request - registry = Registry() - registry.prepare() - - class FakePylons: - translator = None - - fakepylons = FakePylons() - - class FakeRequest: - # Stores details of the translator - environ = {'pylons.pylons': fakepylons} - - registry.register(request, FakeRequest()) - - # create translator - i18n.set_lang(lang) - - # pull out translator and register it - registry.register(translator, fakepylons.translator) - - -def update_package(ckan_ini_filepath, package_id): +def update_package(package_id): """ Given a package, calculates an openness score for each of its resources. It is more efficient to call this than 'update' for each resource. Returns None """ - load_config(ckan_ini_filepath) try: update_package_(package_id) - except Exception, e: + except Exception as e: log.error('Exception occurred during QA update_package: %s: %s', e.__class__.__name__, unicode(e)) raise @@ -145,7 +86,7 @@ def update_package_(package_id): _update_search_index(package.id) -def update(ckan_ini_filepath, resource_id): +def update(resource_id): """ Given a resource, calculates an openness score. @@ -154,10 +95,9 @@ def update(ckan_ini_filepath, resource_id): 'openness_score': score (int) 'openness_score_reason': the reason for the score (string) """ - load_config(ckan_ini_filepath) try: update_resource_(resource_id) - except Exception, e: + except Exception as e: log.error('Exception occurred during QA update_resource: %s: %s', e.__class__.__name__, unicode(e)) raise @@ -227,8 +167,6 @@ def resource_score(resource): score_reason = '' format_ = None - register_translator() - try: score_reasons = [] # a list of strings detailing how we scored it archival = Archival.get_for_resource(resource_id=resource.id) @@ -256,7 +194,7 @@ def resource_score(resource): format_ = get_qa_format(resource.id) score_reason = ' '.join(score_reasons) format_ = format_ or None - except Exception, e: + except Exception as e: log.error('Unexpected error while calculating openness score %s: %s\nException: %s', e.__class__.__name__, unicode(e), traceback.format_exc()) score_reason = _("Unknown error: %s") % str(e) @@ -352,6 +290,7 @@ def score_by_sniffing_data(archival, resource, score_reasons): * If it cannot work out the format then format_string is None * If it cannot score it, then score is None ''' + from ckanext.qa.lib import resource_format_scores if not archival or not archival.cache_filepath: score_reasons.append(_('This file had not been downloaded at the time of scoring it.')) return (None, None) @@ -363,7 +302,7 @@ def score_by_sniffing_data(archival, resource, score_reasons): else: if filepath: sniffed_format = sniff_file_format(filepath) - score = lib.resource_format_scores().get(sniffed_format['format']) \ + score = resource_format_scores().get(sniffed_format['format']) \ if sniffed_format else None if sniffed_format: score_reasons.append(_('Content of file appeared to be format "%s" which receives openness score: %s.') @@ -401,6 +340,7 @@ def score_by_url_extension(resource, score_reasons): * If it cannot work out the format then format is None * If it cannot score it, then score is None ''' + from ckanext.qa.lib import resource_format_scores extension_variants_ = extension_variants(resource.url.strip()) if not extension_variants_: score_reasons.append(_('Could not determine a file extension in the URL.')) @@ -408,7 +348,7 @@ def score_by_url_extension(resource, score_reasons): for extension in extension_variants_: format_ = format_get(extension) if format_: - score = lib.resource_format_scores().get(format_) + score = resource_format_scores().get(format_) if score: score_reasons.append( _('URL extension "%s" relates to format "%s" and receives score: %s.') % (extension, format_, score)) @@ -454,16 +394,17 @@ def score_by_format_field(resource, score_reasons): * If it cannot work out the format then format_string is None * If it cannot score it, then score is None ''' + from ckanext.qa.lib import resource_format_scores, munge_format_to_be_canonical format_field = resource.format or '' if not format_field: score_reasons.append(_('Format field is blank.')) return (None, None) format_tuple = ckan_helpers.resource_formats().get(format_field.lower()) or \ - ckan_helpers.resource_formats().get(lib.munge_format_to_be_canonical(format_field)) + ckan_helpers.resource_formats().get(munge_format_to_be_canonical(format_field)) if not format_tuple: score_reasons.append(_('Format field "%s" does not correspond to a known format.') % format_field) return (None, None) - score = lib.resource_format_scores().get(format_tuple[1]) + score = resource_format_scores().get(format_tuple[1]) score_reasons.append(_('Format field "%s" receives score: %s.') % (format_field, score)) return (score, format_tuple[1]) diff --git a/ckanext/qa/tests/__init__.py b/ckanext/qa/tests/__init__.py index c86805e2..e69de29b 100644 --- a/ckanext/qa/tests/__init__.py +++ b/ckanext/qa/tests/__init__.py @@ -1,12 +0,0 @@ -import mock -from ckan.lib.cli import MockTranslator - - -def setup(): - # Register a mock translator instead of having ckan domain translations defined - patcher = mock.patch('pylons.i18n.translation._get_translator', return_value=MockTranslator()) - patcher.start() - - -def teardown(): - mock.patch.stopall() diff --git a/ckanext/qa/tests/fixtures.py b/ckanext/qa/tests/fixtures.py new file mode 100644 index 00000000..7f78536c --- /dev/null +++ b/ckanext/qa/tests/fixtures.py @@ -0,0 +1,27 @@ +import pytest +import os +import threading +from ckanext.archiver.tests.mock_flask_server import create_app + + +@pytest.fixture(scope='session', autouse=True) +def client(): + app = create_app() + port = 9091 + thread = threading.Thread(target=lambda: app.run(debug=True, port=port, use_reloader=False)) + thread.daemon = True + thread.start() + + yield "http://127.0.0.1:" + str(port) + + +@pytest.fixture(scope='class') +def files(): + fixture_data_dir = os.path.join(os.path.dirname(__file__), 'data') + files = [] + for filename in os.listdir(fixture_data_dir): + format_extension = '.'.join(filename.split('.')[1:]).replace('_', ' ') + filepath = os.path.join(fixture_data_dir, filename) + files.append((format_extension, filepath)) + + yield files diff --git a/ckanext/qa/tests/mock_flask_server.py b/ckanext/qa/tests/mock_flask_server.py new file mode 100644 index 00000000..15e13b58 --- /dev/null +++ b/ckanext/qa/tests/mock_flask_server.py @@ -0,0 +1,44 @@ +import os +from flask import Flask, request, make_response + + +def create_app(): + app = Flask(__name__) + + @app.route('/', defaults={"path": ""}) + @app.route('/') + def echo(path): + status = int(request.args.get('status', 200)) + + content = request.args.get('content', '') + + if 'content_long' in request.args: + content = '*' * 1000001 + + response = make_response(content, status) + + headers = [ + item + for item in list(request.args.items()) + if item[0] not in ('content', 'status') + ] + + if 'length' in request.args: + cl = request.args.get('length') + headers += [('Content-Length', cl)] + elif content and 'no-content-length' not in request.args: + headers += [('Content-Length', bytes(len(content)))] + + for k, v in headers: + response.headers[k] = v + + return response + + return app + + +def get_file_content(data_filename): + filepath = os.path.join(os.path.dirname(__file__), 'data', data_filename) + assert os.path.exists(filepath), filepath + with open(filepath, 'rb') as f: + return f.read() diff --git a/ckanext/qa/tests/mock_remote_server.py b/ckanext/qa/tests/mock_remote_server.py index cd1274bf..e4ea7c81 100644 --- a/ckanext/qa/tests/mock_remote_server.py +++ b/ckanext/qa/tests/mock_remote_server.py @@ -2,14 +2,22 @@ An HTTP server that listens on localhost and returns a variety of responses for mocking remote servers. """ +import sys from contextlib import contextmanager from threading import Thread from time import sleep from wsgiref.simple_server import make_server -from functools import reduce -import urllib2 -import socket import six +import socket +from six.moves import reduce + +try: + from urllib2 import urlopen +except ImportError: + from urllib.request import urlopen + +if sys.version_info[0] >= 3: + unicode = str class MockHTTPServer(object): @@ -21,7 +29,7 @@ class MockHTTPServer(object): a separate thread, eg:: >>> with MockTestServer().serve() as server_address: - ... urllib2.urlopen(server_address) + ... urlopen(server_address) ... Subclass this and override __call__ to provide your own WSGI handler function. @@ -39,7 +47,7 @@ def serve(self, host='localhost', port_range=(8000, 9000)): This uses context manager to make sure the server is stopped:: >>> with MockTestServer().serve() as addr: - ... print urllib2.urlopen('%s/?content=hello+world').read() + ... print urlopen('%s/?content=hello+world').read() ... 'hello world' """ @@ -70,7 +78,7 @@ def _serve_until_stopped(): # call completes. Set a very small timeout as we don't actually need to # wait for a response. We don't care about exceptions here either. try: - urllib2.urlopen("http://%s:%s/" % (host, port), timeout=0.01) + urlopen("http://%s:%s/" % (host, port), timeout=0.01) except Exception: pass diff --git a/ckanext/qa/tests/test_link_checker.py b/ckanext/qa/tests/test_link_checker.py index d04959d0..eeec2a72 100644 --- a/ckanext/qa/tests/test_link_checker.py +++ b/ckanext/qa/tests/test_link_checker.py @@ -1,19 +1,15 @@ +import pytest import logging -from functools import wraps import json -from urllib import urlencode +from nose.tools import assert_in + try: - from ckan.tests.helpers import assert_in - from ckan.tests.legacy import TestController as ControllerTestCase + from urllib import urlencode except ImportError: - from ckan.tests import assert_in - from ckan.tests import TestController as ControllerTestCase -from nose.tools import assert_equal + from urllib.parse import urlencode from ckanext.archiver.tasks import update_package -from mock_remote_server import MockEchoTestServer - # enable celery logging for when you run nosetests -s log = logging.getLogger('ckanext.archiver.tasks') @@ -25,86 +21,88 @@ def get_logger(): update_package.get_logger = get_logger -def with_mock_url(url=''): - """ - Start a MockEchoTestServer call the decorated function with the server's address prepended to ``url``. - """ - def decorator(func): - @wraps(func) - def decorated(*args, **kwargs): - with MockEchoTestServer().serve() as serveraddr: - return func(*(args + ('%s/%s' % (serveraddr, url),)), **kwargs) - return decorated - return decorator - - -class TestLinkChecker(ControllerTestCase): +@pytest.mark.usefixtures('with_plugins') +@pytest.mark.ckan_config('ckan.plugins', 'archiver qa') +class TestLinkChecker(object): """ Tests for link checker task """ - def check_link(self, url): - result = self.app.get('/qa/link_checker?%s' % urlencode({'url': url})) + + def check_link(self, url, base_url, app): + base_url = base_url + '/' if base_url is not None else '' + result = app.get('/qa/link_checker?%s' % urlencode({'url': base_url + url})) return json.loads(result.body)[0] - @with_mock_url('?status=200') - def test_url_working_but_formatless(self, url): - result = self.check_link(url) - assert_equal(result['format'], None) + def test_url_working_but_formatless(self, client, app): + url = '?status=200' + result = self.check_link(url, client, app) + assert result['format'] is None - @with_mock_url('file.csv') - def test_format_by_url_extension(self, url): - result = self.check_link(url) - assert_equal(result['format'], 'CSV') + def test_format_by_url_extension(self, client, app): + url = 'file.csv' + result = self.check_link(url, client, app) + assert result['format'] == 'CSV' - @with_mock_url('file.csv.zip') - def test_format_by_url_extension_zipped(self, url): - result = self.check_link(url) - assert_equal(result['format'], 'CSV / ZIP') + def test_format_by_url_extension_zipped(self, client, app): + url = 'file.csv.zip' + result = self.check_link(url, client, app) + assert result['format'] == 'CSV / ZIP' - @with_mock_url('file.f1.f2') - def test_format_by_url_extension_unknown(self, url): - result = self.check_link(url) - assert_equal(result['format'], 'F1 / F2') + def test_format_by_url_extension_unknown(self, client, app): + url = 'file.f1.f2' + result = self.check_link(url, client, app) + assert result['format'] == 'F1 / F2' - def test_encoded_url(self): + def test_encoded_url(self, client, app): # This is not actually a URL, and the encoded letters get # interpreted as being in the hostname. But should not cause # an exception. url = 'Over+\xc2\xa325,000+expenditure+report+April-13' - result = self.check_link(url) - assert_equal(result['format'], '') + result = self.check_link(url, client, app) + assert result['format'] is None - @with_mock_url('?status=200;content-type=text/plain') - def test_format_by_mimetype_txt(self, url): - result = self.check_link(url) - assert_equal(result['format'], 'TXT') + def test_format_by_mimetype_txt(self, client, app): + url = '?status=200&content-type=text/plain' + result = self.check_link(url, client, app) + assert result['format'] == 'TXT' - @with_mock_url('?status=200;content-type=text/csv') - def test_format_by_mimetype_csv(self, url): - result = self.check_link(url) - assert_equal(result['format'], 'CSV') + def test_format_by_mimetype_csv(self, client, app): + url = '?status=200&content-type=text/csv' + result = self.check_link(url, client, app) + assert result['format'] == 'CSV' - def test_file_url(self): + def test_file_url(self, client, app): url = u'file:///home/root/test.txt' - result = self.check_link(url) - assert_in(u'Invalid url scheme. Please use one of: ftp http https', - result['url_errors']) - # assert_raises(LinkCheckerError, link_checker, context, data) - - def test_empty_url(self): + result = self.check_link(url, None, app) + + format_in_use = None + # htt/https/ftp comes in random order in url_errors so check if any possible format is used in url_error + schemes = [u'http ftp https', u'http https ftp', u'https ftp http', u'https http ftp', u'ftp https http', + u'ftp http https'] + for scheme in schemes: + if u'Invalid url scheme. Please use one of: %s' % scheme in result['url_errors']: + format_in_use = u'Invalid url scheme. Please use one of: %s' % scheme + break + + if format_in_use: + assert_in(format_in_use, result['url_errors']) + else: + pytest.fail("Link check failed {}".format(result['url_errors'])) + + def test_empty_url(self, client, app): url = u'' - result = self.check_link(url) + result = self.check_link(url, None, app) assert_in("URL parsing failure - did not find a host name", result['url_errors']) - @with_mock_url('?status=503') - def test_url_with_503(self, url): - result = self.check_link(url) - assert_in('Server returned HTTP error status: 503 Service Unavailable', result['url_errors']) + def test_url_with_503(self, client, app): + url = '?status=503' + result = self.check_link(url, client, app) + assert_in('Server returned HTTP error status: 503 SERVICE UNAVAILABLE', result['url_errors']) - @with_mock_url('?status=404') - def test_url_with_404(self, url): - result = self.check_link(url) - assert_in('Server returned HTTP error status: 404 Not Found', result['url_errors']) + def test_url_with_404(self, client, app): + url = '?status=404' + result = self.check_link(url, client, app) + assert_in('Server returned HTTP error status: 404 NOT FOUND', result['url_errors']) # Disabled as doesn't work # @with_mock_url('') @@ -113,21 +111,21 @@ def test_url_with_404(self, url): # url += u'?status=301&location=%s' % quote_plus(redirect_url) # result = self.check_link(url) # # The redirect works and the CSV is picked up - # assert_equal(result['format'], 'CSV') + # assert(result['format'], 'CSV') # e.g. "http://www.dasa.mod.uk/applications/newWeb/www/index.php?page=48 # &thiscontent=180&date=2011-05-26&pubType=1&PublishTime=09:30:00&from=home&tabOption=1" - @with_mock_url('?time=09:30&status=200') - def test_colon_in_query_string(self, url): + def test_colon_in_query_string(self, client, app): + url = '?time=09:30&status=200' # accept, because browsers accept this # see discussion: http://trac.ckan.org/ticket/318 - result = self.check_link(url) - print result - assert_equal(result['url_errors'], []) + result = self.check_link(url, client, app) + print(result) + assert result['url_errors'] == [] - @with_mock_url('?status=200 ') - def test_trailing_whitespace(self, url): + def test_trailing_whitespace(self, client, app): + url = '?status=200 ' # accept, because browsers accept this - result = self.check_link(url) - print result - assert_equal(result['url_errors'], []) + result = self.check_link(url, client, app) + print(result) + assert result['url_errors'] == [] diff --git a/ckanext/qa/tests/test_sniff_format.py b/ckanext/qa/tests/test_sniff_format.py index f7b86577..3cc24f9f 100644 --- a/ckanext/qa/tests/test_sniff_format.py +++ b/ckanext/qa/tests/test_sniff_format.py @@ -1,9 +1,7 @@ import os +import pytest import logging -from nose.tools import assert_equal -from nose.plugins.skip import SkipTest - from ckan import plugins as p from ckanext.qa.sniff_format import sniff_file_format, is_json, is_ttl, turtle_regex @@ -12,6 +10,7 @@ log = logging.getLogger('ckan.sniff') +@pytest.mark.usefixtures('files') class TestSniffFormat: @classmethod def setup_class(cls): @@ -32,14 +31,14 @@ def assert_file_has_format_sniffed_correctly(cls, format_extension, filepath): sniffed_format = sniff_file_format(filepath) assert sniffed_format, expected_format expected_format_without_zip = expected_format.replace('.zip', '') - assert_equal(sniffed_format['format'].lower(), expected_format_without_zip) + assert sniffed_format['format'].lower() == expected_format_without_zip expected_container = None if expected_format.endswith('.zip'): expected_container = 'ZIP' elif expected_format.endswith('.gzip'): expected_container = 'ZIP' # lumped together with zip for simplicity now - assert_equal(sniffed_format.get('container'), expected_container) + assert sniffed_format.get('container') == expected_container # def test_all(self): # for format_extension, filepath in self.fixture_files: @@ -104,7 +103,7 @@ def test_odt(self): def test_odp(self): if p.toolkit.check_ckan_version(max_version='2.3.99'): - raise SkipTest + pytest.skip("Test only on version >2.5.99") self.check_format('odp') def test_ppt(self): @@ -205,7 +204,8 @@ def test_wfs_2_0(self): def test_wmts(self): if p.toolkit.check_ckan_version(max_version='2.5.99'): - raise SkipTest + pytest.skip("Test only on version >2.5.99") + self.check_format('wmts', 'ukho_bathymetry.wmts') def test_wcs(self): diff --git a/ckanext/qa/tests/test_tasks.py b/ckanext/qa/tests/test_tasks.py index 4a9bf3bd..8e6faf19 100644 --- a/ckanext/qa/tests/test_tasks.py +++ b/ckanext/qa/tests/test_tasks.py @@ -1,22 +1,14 @@ +import pytest import requests import logging import urllib import datetime -from nose.tools import assert_equal -from nose.plugins.skip import SkipTest from ckan import model from ckan.logic import get_action from ckan import plugins as p import ckan.lib.helpers as ckan_helpers -try: - from ckan.tests.helpers import reset_db - from ckan.tests import factories as ckan_factories - from ckan.tests.legacy import BaseCase -except ImportError: - from ckan.new_tests.helpers import reset_db - from ckan.new_tests import factories as ckan_factories - from ckan.tests import BaseCase +from ckantoolkit.tests import factories as ckan_factories import ckanext.qa.tasks from ckanext.qa.tasks import resource_score, extension_variants @@ -65,11 +57,12 @@ def set_sniffed_format(format_name): TODAY_STR = TODAY.isoformat() -class TestTask(BaseCase): - - @classmethod - def setup_class(cls): - reset_db() +@pytest.mark.usefixtures('with_plugins') +@pytest.mark.ckan_config('ckan.plugins', 'qa archiver report') +class TestTask(): + @pytest.fixture(autouse=True) + @pytest.mark.usefixtures('clean_db') + def init_data(cls, clean_db): archiver_model.init_tables(model.meta.engine) qa_model.init_tables(model.meta.engine) @@ -100,11 +93,12 @@ def test_trigger_on_archival(cls): # TODO run celery and check it actually ran... -class TestResourceScore(BaseCase): - - @classmethod - def setup_class(cls): - reset_db() +@pytest.mark.usefixtures('with_plugins') +@pytest.mark.ckan_config('ckan.plugins', 'qa archiver report') +class TestResourceScore(): + @pytest.fixture(autouse=True) + @pytest.mark.usefixtures('clean_db') + def init_data(cls, clean_db): archiver_model.init_tables(model.meta.engine) qa_model.init_tables(model.meta.engine) cls.fake_resource = { @@ -175,7 +169,7 @@ def test_by_extension(self): result = resource_score(self._test_resource('http://site.com/filename.xls')) assert result['openness_score'] == 2, result assert result['archival_timestamp'] == TODAY_STR, result - assert_equal(result['format'], 'XLS') + assert result['format'] == 'XLS' assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result @@ -190,7 +184,7 @@ def test_by_format_field(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='XLS')) assert result['openness_score'] == 2, result - assert_equal(result['format'], 'XLS') + assert result['format'] == 'XLS' assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field "XLS"' in result['openness_score_reason'], result @@ -198,9 +192,9 @@ def test_by_format_field(self): def test_by_format_field_excel(self): set_sniffed_format(None) if p.toolkit.check_ckan_version(max_version='2.4.99'): - raise SkipTest + pytest.skip("Test only on version >2.4.99") result = resource_score(self._test_resource(format='Excel')) - assert_equal(result['format'], 'XLS') + assert result['format'] == 'XLS' def test_format_field_not_recognized(self): set_sniffed_format(None) @@ -222,7 +216,7 @@ def test_available_but_not_open(self): set_sniffed_format('CSV') result = resource_score(self._test_resource(license_id=None)) assert result['openness_score'] == 0, result - assert_equal(result['format'], 'CSV') + assert result['format'] == 'CSV' assert 'License not open' in result['openness_score_reason'], result def test_not_available_and_not_open(self): @@ -237,12 +231,12 @@ def test_not_available_and_not_open(self): model.Session.commit() result = resource_score(res) assert result['openness_score'] == 0, result - assert_equal(result['format'], None) + assert result['format'] is None # in preference it should report that it is not available - assert_equal(result['openness_score_reason'], u'File could not be downloaded. ' - u'Reason: Download error. Error details: Server returned 500 error.' - u' Attempted on 10/10/2008. Tried 16 times since 01/10/2008.' - u' This URL has not worked in the history of this tool.') + assert result['openness_score_reason'] == (u'File could not be downloaded. ' + u'Reason: Download error. Error details: Server returned 500 error.' + u' Attempted on 10/10/2008. Tried 16 times since 01/10/2008.' + u' This URL has not worked in the history of this tool.') def test_not_available_any_more(self): # A cache of the data still exists from the previous run, but this @@ -265,35 +259,33 @@ def test_not_available_any_more(self): archival.is_broken = True result = resource_score(res) assert result['openness_score'] == 0, result - assert_equal(result['format'], 'CSV') + assert result['format'] == 'CSV' # in preference it should report that it is not available - assert_equal(result['openness_score_reason'], 'File could not be downloaded. ' - 'Reason: Download error. Error details: Server returned 404 error.' - ' Attempted on 10/10/2008. This URL last worked on: 01/10/2008.') + assert result['openness_score_reason'] == ('File could not be downloaded. ' + 'Reason: Download error. Error details: Server returned 404 error.' + ' Attempted on 10/10/2008. This URL last worked on: 01/10/2008.') -class TestExtensionVariants: +class TestExtensionVariants(): def test_0_normal(self): - assert_equal(extension_variants('http://dept.gov.uk/coins-data-1996.csv'), - ['csv']) + assert extension_variants('http://dept.gov.uk/coins-data-1996.csv') == ['csv'] def test_1_multiple(self): - assert_equal(extension_variants('http://dept.gov.uk/coins.data.1996.csv.zip'), - ['csv.zip', 'zip']) + assert extension_variants('http://dept.gov.uk/coins.data.1996.csv.zip') == ['csv.zip', 'zip'] def test_2_parameter(self): - assert_equal(extension_variants('http://dept.gov.uk/coins-data-1996.csv?callback=1'), - ['csv']) + assert extension_variants('http://dept.gov.uk/coins-data-1996.csv?callback=1') == ['csv'] def test_3_none(self): - assert_equal(extension_variants('http://dept.gov.uk/coins-data-1996'), - []) + assert extension_variants('http://dept.gov.uk/coins-data-1996') == [] -class TestSaveQaResult(object): - @classmethod - def setup_class(cls): - reset_db() +@pytest.mark.usefixtures('with_plugins') +@pytest.mark.ckan_config('ckan.plugins', 'qa archiver report') +class TestSaveQaResult(): + @pytest.fixture(autouse=True) + @pytest.mark.usefixtures('clean_db') + def init_data(cls, clean_db): archiver_model.init_tables(model.meta.engine) qa_model.init_tables(model.meta.engine) @@ -315,18 +307,19 @@ def test_simple(self): qa = ckanext.qa.tasks.save_qa_result(resource, qa_result) - assert_equal(qa.openness_score, qa_result['openness_score']) - assert_equal(qa.openness_score_reason, - qa_result['openness_score_reason']) - assert_equal(qa.format, qa_result['format']) - assert_equal(qa.archival_timestamp, qa_result['archival_timestamp']) - assert qa.updated, qa.updated + assert qa.openness_score == qa_result['openness_score'] + assert qa.openness_score_reason == qa_result['openness_score_reason'] + assert qa.format == qa_result['format'] + assert qa.archival_timestamp == qa_result['archival_timestamp'] + assert qa.updated == qa.updated -class TestUpdatePackage(object): - @classmethod - def setup_class(cls): - reset_db() +@pytest.mark.usefixtures('with_plugins') +@pytest.mark.ckan_config('ckan.plugins', 'qa archiver report') +class TestUpdatePackage(): + @pytest.fixture(autouse=True) + @pytest.mark.usefixtures('clean_db') + def init_data(cls, clean_db): archiver_model.init_tables(model.meta.engine) qa_model.init_tables(model.meta.engine) @@ -343,14 +336,16 @@ def test_simple(self): qa = qa_model.QA.get_for_resource(dataset['resources'][0]['id']) assert qa - assert_equal(qa.openness_score, 0) - assert_equal(qa.openness_score_reason, 'License not open') + assert qa.openness_score == 0 + assert qa.openness_score_reason == 'License not open' -class TestUpdateResource(object): - @classmethod - def setup_class(cls): - reset_db() +@pytest.mark.usefixtures('with_plugins') +@pytest.mark.ckan_config('ckan.plugins', 'qa archiver report') +class TestUpdateResource(): + @pytest.fixture(autouse=True) + @pytest.mark.usefixtures('clean_db') + def init_data(cls, clean_db): archiver_model.init_tables(model.meta.engine) qa_model.init_tables(model.meta.engine) @@ -367,5 +362,5 @@ def test_simple(self): qa = qa_model.QA.get_for_resource(dataset['resources'][0]['id']) assert qa - assert_equal(qa.openness_score, 0) - assert_equal(qa.openness_score_reason, 'License not open') + assert qa.openness_score == 0 + assert qa.openness_score_reason == 'License not open' diff --git a/ckanext/qa/utils.py b/ckanext/qa/utils.py new file mode 100644 index 00000000..fb5c00b6 --- /dev/null +++ b/ckanext/qa/utils.py @@ -0,0 +1,195 @@ +import sys +from sqlalchemy import or_ +import six +import logging +log = logging.getLogger(__name__) + + +def init_db(): + import ckan.model as model + from ckanext.qa.model import init_tables + init_tables(model.meta.engine) + + +def update(ids, queue): + from ckan import model + from ckanext.qa import lib + packages = [] + resources = [] + if len(ids) > 0: + for id in ids: + # try id as a group id/name + group = model.Group.get(id) + if group and group.is_organization: + # group.packages() is unreliable for an organization - + # member objects are not definitive whereas owner_org, so + # get packages using owner_org + query = model.Session.query(model.Package)\ + .filter( + or_(model.Package.state == 'active', + model.Package.state == 'pending'))\ + .filter_by(owner_org=group.id) + packages.extend(query.all()) + if not queue: + queue = 'bulk' + continue + elif group: + packages.extend(group.packages()) + if not queue: + queue = 'bulk' + continue + # try id as a package id/name + pkg = model.Package.get(id) + if pkg: + packages.append(pkg) + if not queue: + queue = 'priority' + continue + # try id as a resource id + res = model.Resource.get(id) + if res: + resources.append(res) + if not queue: + queue = 'priority' + continue + else: + log.error('Could not recognize as a group, package ' + 'or resource: %r', id) + sys.exit(1) + else: + # all packages + pkgs = model.Session.query(model.Package)\ + .filter_by(state='active')\ + .order_by('name').all() + packages.extend(pkgs) + if not queue: + queue = 'bulk' + + if packages: + log.info('Datasets to QA: %d', len(packages)) + if resources: + log.info('Resources to QA: %d', len(resources)) + if not (packages or resources): + log.error('No datasets or resources to process') + sys.exit(1) + + log.info('Queue: %s', queue) + for package in packages: + lib.create_qa_update_package_task(package, queue) + log.info('Queuing dataset %s (%s resources)', + package.name, len(package.resources)) + + for resource in resources: + package = resource.resource_group.package + log.info('Queuing resource %s/%s', package.name, resource.id) + lib.create_qa_update_task(resource, queue) + + log.info('Completed queueing') + + +def sniff(filepaths): + from ckanext.qa.sniff_format import sniff_file_format + + for filepath in filepaths: + format_ = sniff_file_format( + filepath) + if format_: + print('Detected as: %s - %s' % (format_['display_name'], + filepath)) + else: + print('ERROR: Could not recognise format of: %s' % filepath) + + +def view(package_ref=None): + from ckan import model + + q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') + print('QA records - %i TaskStatus rows' % q.count()) + print(' across %i Resources' % q.distinct('entity_id').count()) + + if package_ref: + pkg = model.Package.get(package_ref) + print('Package %s %s' % (pkg.name, pkg.id)) + for res in pkg.resources: + print('Resource %s' % res.id) + for row in q.filter_by(entity_id=res.id): + print('* %s = %r error=%r' % (row.key, row.value, + row.error)) + + +def clean(): + from ckan import model + + print('Before:') + view() + + q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') + q.delete() + model.Session.commit() + + print('After:') + view() + + +def migrate1(): + from ckan import model + from ckan.lib.helpers import json + + q_status = model.Session.query(model.TaskStatus) \ + .filter_by(task_type='qa') \ + .filter_by(key='status') + print('* %s with "status" will be deleted e.g. %s' % (q_status.count(), + q_status.first())) + q_failures = model.Session.query(model.TaskStatus) \ + .filter_by(task_type='qa') \ + .filter_by(key='openness_score_failure_count') + print('* %s with openness_score_failure_count to be deleted e.g.\n%s' + % (q_failures.count(), q_failures.first())) + q_score = model.Session.query(model.TaskStatus) \ + .filter_by(task_type='qa') \ + .filter_by(key='openness_score') + print('* %s with openness_score to migrate e.g.\n%s' % + (q_score.count(), q_score.first())) + q_reason = model.Session.query(model.TaskStatus) \ + .filter_by(task_type='qa') \ + .filter_by(key='openness_score_reason') + print('* %s with openness_score_reason to migrate e.g.\n%s' % + (q_reason.count(), q_reason.first())) + + six.moves.input('Press Enter to continue') + + q_status.delete() + model.Session.commit() + print('..."status" deleted') + + q_failures.delete() + model.Session.commit() + print('..."openness_score_failure_count" deleted') + + for task_status in q_score: + reason_task_status = q_reason \ + .filter_by(entity_id=task_status.entity_id) \ + .first() + if reason_task_status: + reason = reason_task_status.value + reason_task_status.delete() + else: + reason = None + + task_status.key = 'status' + task_status.error = json.dumps({ + 'reason': reason, + 'format': None, + 'is_broken': None, + }) + model.Session.commit() + print('..."openness_score" and "openness_score_reason" migrated') + + count = q_reason.count() + q_reason.delete() + model.Session.commit() + print('... %i remaining "openness_score_reason" deleted' % count) + + model.Session.flush() + model.Session.remove() + print('Migration succeeded') diff --git a/ckanext/qa/views.py b/ckanext/qa/views.py new file mode 100644 index 00000000..b8bcefd8 --- /dev/null +++ b/ckanext/qa/views.py @@ -0,0 +1,124 @@ +from flask import Blueprint +import json +import mimetypes +import posixpath +import sys +from ckan.plugins.toolkit import request +from ckanext.archiver.tasks import link_checker, LinkCheckerError +from ckan.lib import helpers as ckan_helpers +from ckan.lib.helpers import parse_rfc_2822_date + +if sys.version_info[0] >= 3: + from urllib.parse import urlparse +else: + from urlparse import urlparse + + +def qa_resource_checklink(): + urls = request.args.getlist('url') + result = [_check_link(url) for url in urls] + return json.dumps(result) + + +def _check_link(url): + """ + Synchronously check the given link, and return dict representing results. + Does not handle 30x redirects. + """ + + parsed_url = urlparse(url) + scheme = parsed_url.scheme + path = parsed_url.path + + # If a user enters "www.example.com" then we assume they meant "http://www.example.com" + if not scheme: + url = 'http://' + path + + context = {} + data = { + 'url_timeout': 10, + 'url': url + } + result = { + 'errors': [], + 'url_errors': [], + 'format': '', + 'mimetype': '', + 'size': '', + 'last_modified': '', + } + + try: + headers = json.loads(link_checker(json.dumps(context), json.dumps(data))) + result['format'] = _extract_file_format(url, headers) + result['mimetype'] = _extract_mimetype(headers) + result['size'] = headers.get('content-length', '') + result['last_modified'] = _parse_and_format_date(headers.get('last-modified', '')) + except LinkCheckerError as e: + result['url_errors'].append(str(e)) + return result + + +def _extract_file_format(url, headers): + """ + Makes a best guess at the file format. + + /path/to/a_file.csv has format "CSV" + /path/to/a_file.csv.zip has format "CSV / Zip" + + First this function tries to extract the file-extensions from the url, + and deduce the format from there. If no file-extension is found, then + the mimetype from the headers is passed to `mimetypes.guess_extension()`. + """ + formats = [] + parsed_url = urlparse(url) + path = parsed_url.path + base, extension = posixpath.splitext(path) + while extension: + formats.append(extension[1:].upper()) # strip leading '.' from extension + base, extension = posixpath.splitext(base) + if formats: + extension = '.'.join(formats[::-1]).lower() + format_tuple = ckan_helpers.resource_formats().get(extension) + if format_tuple: + return format_tuple[1] + return ' / '.join(formats[::-1]) + + # No file extension found, attempt to extract format using the mimetype + stripped_mimetype = _extract_mimetype(headers) # stripped of charset + format_tuple = ckan_helpers.resource_formats().get(stripped_mimetype) + if format_tuple: + return format_tuple[1] + + extension = mimetypes.guess_extension(stripped_mimetype) + if extension: + return extension[1:].upper() + + +def _extract_mimetype(headers): + """ + The Content-Type in headers, stripped of character encoding parameters. + """ + return headers.get('content-type', '').split(';')[0].strip() + + +def _parse_and_format_date(date_string): + """ + Parse date string in form specified in RFC 2822, and reformat to iso format. + + Returns the empty string if the date_string cannot be parsed + """ + dt = parse_rfc_2822_date(date_string) + + # Remove timezone information, adjusting as necessary. + if dt and dt.tzinfo: + dt = (dt - dt.utcoffset()).replace(tzinfo=None) + return dt.isoformat() if dt else '' + + +qa_blueprints = Blueprint('qa_blueprint', __name__) +qa_blueprints.add_url_rule('/qa/link_checker', view_func=qa_resource_checklink) + + +def get_blueprints(): + return [qa_blueprints] diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..37722bf0 --- /dev/null +++ b/conftest.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +pytest_plugins = [ + u'ckanext.qa.tests.fixtures' +] diff --git a/dev-requirements.txt b/dev-requirements.txt index ed77b3d6..9f86f04b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,6 @@ -nose +-e git+http://github.com/ckan/ckanext-report.git#egg=ckanext-report +-e git+http://github.com/ckan/ckanext-archiver.git#egg=ckanext-archiver mock flask +pytest-ckan +pytest-cov \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 70da4e40..8a13b5db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,10 @@ -xlrd==1.0.0 +ckantoolkit>=0.0.7 +SQLAlchemy>=0.6.6 +requests +progressbar +six>=1.9 # until messytables->html5lib releases https://github.com/html5lib/html5lib-python/pull/301 +xlrd==2.0.1 python-magic==0.4.12 messytables==0.15.2 -progressbar==2.3 +progressbar2==3.53.3 +future>=0.18.2 diff --git a/setup.py b/setup.py index 3bdf9296..4dc8523a 100644 --- a/setup.py +++ b/setup.py @@ -16,22 +16,7 @@ namespace_packages=['ckanext'], include_package_data=True, zip_safe=False, - install_requires=[ - 'ckanext-archiver>=2.0', - 'ckanext-report', - 'SQLAlchemy>=0.6.6', - 'requests', - 'xlrd>=0.8.0', - 'messytables>=0.8', - 'python-magic>=0.4', - 'progressbar', - 'six>=1.9' # until messytables->html5lib releases https://github.com/html5lib/html5lib-python/pull/301 - ], - tests_require=[ - 'nose', - 'mock', - 'flask' - ], + install_requires=[], entry_points=''' [paste.paster_command] qa=ckanext.qa.commands:QACommand diff --git a/test-core.ini b/test-core.ini deleted file mode 100644 index ad68a59c..00000000 --- a/test-core.ini +++ /dev/null @@ -1,63 +0,0 @@ -# -# ckan - Pylons testing environment configuration -# -# The %(here)s variable will be replaced with the parent directory of this file -# -[DEFAULT] -debug = true -# Uncomment and replace with the address which should receive any error reports -#email_to = you@yourdomain.com -smtp_server = localhost -error_email_from = paste@localhost - -[server:main] -use = egg:Paste#http -host = 0.0.0.0 -port = 5000 - - -[app:main] -use = config:../ckan/test-core.ini - -ckan.plugins = qa - -# Logging configuration -[loggers] -keys = root, ckan, ckanext, sqlalchemy - -[handlers] -keys = console - -[formatters] -keys = generic - -[logger_root] -level = WARN -handlers = console - -[logger_ckan] -qualname = ckan -handlers = console -level = INFO -propagate = 0 - -[logger_ckanext] -qualname = ckanext -handlers = console -level = DEBUG -propagate = 0 - -[logger_sqlalchemy] -handlers = -qualname = sqlalchemy.engine -level = WARN -propagate = 0 - -[handler_console] -class = StreamHandler -args = (sys.stdout,) -level = NOTSET -formatter = generic - -[formatter_generic] -format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s diff --git a/test.ini b/test.ini index 7c1094c4..32d25f17 100644 --- a/test.ini +++ b/test.ini @@ -17,7 +17,8 @@ port = 5000 [app:main] -use = config:test-core.ini +use = config:../ckan/test-core.ini +ckan.plugins = qa archiver report # Here we hard-code the database and a flag to make default tests # run fast. faster_db_test_hacks = True