diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 026326b..93f34aa 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - cove: [ 'oc4ids' , 'ocds' , 'bods'] + cove: [ 'oc4ids' , 'ocds' ] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v1 @@ -29,15 +29,8 @@ jobs: git checkout main cd .. git clone https://github.com/open-contracting/lib-cove-ocds.git - - - name: bods - if: matrix.cove == 'bods' - run: | - git clone https://github.com/openownership/cove-bods.git - cd cove-bods - git checkout master - cd .. - git clone https://github.com/openownership/lib-cove-bods.git + cd lib-cove-ocds + git checkout 0.11.3 - name: Install run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index c46965f..afdcf84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,16 +7,29 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] -## Changed +## [0.31.0] - 2023-07-06 + +### Changed + +- Eliminate size limit on caching requests https://github.com/OpenDataServices/lib-cove/pull/120 + +### Fixed + +- Fix crash when tmp directory is on different filesystem https://github.com/OpenDataServices/lib-cove/issues/84 +- Use sentence case consistently in validation error messages https://github.com/OpenDataServices/lib-cove/issues/28 +- Support jsonschema>=4.10 https://github.com/OpenDataServices/lib-cove/pull/118 ## [0.30.0] - 2023-03-10 +### Changed + - Allow jsonschema version 4. - Support arrays of strings that must be on a codelist https://github.com/ThreeSixtyGiving/dataquality/issues/80 - ## [0.29.0] - 2022-12-14 +### Changed + - Add `SchemaJsonMixin.process_codelists` (previously only existed in lib-cove-ocds, will be used by 360 CoVE) https://github.com/OpenDataServices/lib-cove/pull/109 ## [0.28.0] - 2022-11-18 @@ -34,7 +47,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [0.27.0] - 2021-11-02 -## Added +### Added - oneOf validator will read a new "oneOfEnumSelectorField" option in schema and use that to pick subschema. (Previously this worked for "statementType" only, for BODS) @@ -45,25 +58,25 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [0.26.1] - 2021-10-01 -## Changed +### Changed - Lock to jsonschema version 3 (we use internal tools that are not available in V4) ## [0.26.0] - 2021-09-15 -## Changed +### Changed - Various performance improvements https://github.com/open-contracting/lib-cove-oc4ids/issues/23 ## [0.25.0] - 2021-08-18 -## Added +### Added - Add a function to calculate field coverage https://github.com/open-contracting/cove-oc4ids/issues/98 ## [0.24.0] - 2021-05-20 -## Changed +### Changed - Update `unique_ids` override to support multiple ids. If you called `unique_ids` with `id_name="some_id"`, you now need to call `id_names=["some_id"]`. See this lib-cove-ocds PR as an example: https://github.com/open-contracting/lib-cove-ocds/pull/91/files @@ -77,7 +90,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Drop Python 3.5 support https://github.com/OpenDataServices/lib-cove/pull/81 -## CHanged +### Changed - Remove unused dependencies from setup.py https://github.com/OpenDataServices/lib-cove/pull/80 diff --git a/libcove/lib/common.py b/libcove/lib/common.py index dbec286..4465342 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -8,6 +8,7 @@ import numbers import os import re +import shutil from tempfile import NamedTemporaryFile from urllib.parse import urljoin, urlparse, urlsplit from urllib.request import urlopen @@ -720,11 +721,9 @@ def get_additional_codelist_values(schema_obj, json_data): path_string = "/".join(path_no_num) if path_string not in additional_codelist_values: - codelist_url = schema_obj.codelists + codelist codelist_amend_urls = [] if hasattr(schema_obj, "extended_codelist_urls"): - # Replace URL if this codelist is overridden by an extension. # Last one to be applied wins. if schema_obj.extended_codelist_urls.get(codelist): @@ -771,7 +770,6 @@ def get_additional_fields_info(json_data, schema_fields, context, fields_regex=F root_additional_fields = set() for field, field_info in fields_present.items(): - if field in schema_fields: continue if fields_regex and LANGUAGE_RE.search(field.split("/")[-1]): @@ -804,7 +802,6 @@ def get_counts_additional_fields( fields_regex=False, additional_fields_info=None, ): - if not additional_fields_info: schema_fields = schema_obj.get_pkg_schema_fields() additional_fields_info = get_additional_fields_info( @@ -850,6 +847,12 @@ def get_schema_validation_errors( schema_url=schema_obj.schema_host, ) + # Force jsonschema to use our validator. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + validator + ) + our_validator = validator( pkg_schema_obj, format_checker=format_checker, resolver=resolver ) @@ -887,7 +890,7 @@ def get_schema_validation_errors( header = e.path[-1] if isinstance(e.path[-1], int) and len(e.path) >= 2: # We're dealing with elements in an array of items at this point - pre_header = "Array Element " + pre_header = "Array element " header_extra = "{}/[number]".format(e.path[-2]) null_clause = "" @@ -1000,6 +1003,13 @@ def get_schema_validation_errors( validation_errors[ json.dumps(unique_validator_key, default=decimal_default) ].append(value) + + # Restore jsonschema's default validator, to not interfere with other software. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + jsonschema.validators.Draft4Validator + ) + return dict(validation_errors) @@ -1445,7 +1455,7 @@ def get_orgids_prefixes(orgids_url=None): # Use a tempfile and move to create new file here for atomicity with NamedTemporaryFile(mode="w", delete=False) as tmp: json.dump(org_id_file_contents, tmp, indent=2) - os.rename(tmp.name, local_org_ids_file) + shutil.move(tmp.name, local_org_ids_file) # Return either the original file data, if it was found to be fresh, or the new data, if we were able to retrieve it. return [org_list["code"] for org_list in org_id_file_contents["lists"]] diff --git a/libcove/lib/tools.py b/libcove/lib/tools.py index 96975ec..4a62b14 100644 --- a/libcove/lib/tools.py +++ b/libcove/lib/tools.py @@ -6,7 +6,7 @@ from .exceptions import UnrecognisedFileType -@lru_cache(maxsize=64) +@lru_cache(maxsize=None) def cached_get_request(url): return requests.get(url) diff --git a/setup.py b/setup.py index eb0ccdb..1606175 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="libcove", - version="0.30.0", + version="0.31.0", author="Open Data Services", author_email="code@opendataservices.coop", url="https://github.com/OpenDataServices/lib-cove", diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index 6296933..fd4ae36 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -333,7 +333,6 @@ def test_get_schema_deprecated_paths(): def test_schema_dict_fields_generator_release_schema_deprecated_fields(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -362,7 +361,6 @@ def test_schema_dict_fields_generator_release_schema_deprecated_fields(): def test_schema_dict_fields_generator_schema_with_list_and_oneof(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -396,7 +394,6 @@ def test_schema_dict_fields_generator_schema_with_list_and_oneof(): def test_fields_present_generator_tenders_releases_2_releases(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -449,7 +446,6 @@ def test_fields_present_generator_tenders_releases_2_releases(): def test_fields_present_generator_data_root_is_list(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -501,7 +497,6 @@ def test_fields_present_generator_data_root_is_list(): def test_get_additional_fields_info(): - simple_data = { "non_additional_field": "a", "non_additional_list": [1, 2], @@ -1227,7 +1222,6 @@ def test_get_field_coverage_oc4ids(): ), ) def test_oneOfEnumSelectorField(data, count, errors): - with open(common_fixtures("schema_with_one_of_enum_selector_field.json")) as fp: schema = json.load(fp) diff --git a/tests/lib/test_converters.py b/tests/lib/test_converters.py index 11a8669..4660301 100644 --- a/tests/lib/test_converters.py +++ b/tests/lib/test_converters.py @@ -8,7 +8,6 @@ def test_convert_json_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) @@ -55,7 +54,6 @@ def test_convert_json_1(): def test_convert_activity_xml_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-iati-tests-", dir=tempfile.gettempdir() ) @@ -110,7 +108,6 @@ def test_convert_activity_xml_1(): def test_convert_org_xml_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-iati-tests-", dir=tempfile.gettempdir() ) @@ -166,7 +163,6 @@ def test_convert_org_xml_1(): def test_convert_json_root_is_list_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) @@ -214,7 +210,6 @@ def test_convert_json_root_is_list_1(): def test_convert_csv_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() )