From a77f32e3ab0f8505f89537712c5064789799dd6f Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 27 Aug 2024 17:02:23 +0200 Subject: [PATCH] Consolidate schema files as they are identical --- README.md | 4 +- ckanext/dcat/schemas/dcat_ap_3_full.yaml | 384 ------------------ ...cat_ap_2.1_full.yaml => dcat_ap_full.yaml} | 0 ...ommended.yaml => dcat_ap_recommended.yaml} | 0 .../dcat_ap_2/test_scheming_support.py | 8 +- .../test_euro_dcatap_3_profile_parse.py | 2 +- .../test_euro_dcatap_3_profile_serialize.py | 2 +- ckanext/dcat/tests/shacl/test_shacl.py | 8 +- 8 files changed, 12 insertions(+), 396 deletions(-) delete mode 100644 ckanext/dcat/schemas/dcat_ap_3_full.yaml rename ckanext/dcat/schemas/{dcat_ap_2.1_full.yaml => dcat_ap_full.yaml} (100%) rename ckanext/dcat/schemas/{dcat_ap_2.1_recommended.yaml => dcat_ap_recommended.yaml} (100%) diff --git a/README.md b/README.md index cd1b51e1..83c8baa6 100644 --- a/README.md +++ b/README.md @@ -117,8 +117,8 @@ The extension includes ready to use [ckanext-scheming](https://github.com/ckan/c There are the following schemas currently included with the extension: -* *dcat_ap_2.1_recommended.yaml*: Includes the recommended properties for `dcat:Dataset` and `dcat:Distribution` according to the [DCAT 2.1](https://semiceu.github.io/DCAT-AP/releases/2.1.1/) specification. -* *dcat_ap_2.1_full.yaml*: Includes most of the properties defined for `dcat:Dataset` and `dcat:Distribution` in the [DCAT 2.1](https://semiceu.github.io/DCAT-AP/releases/2.1.1/) specification. +* *dcat_ap_recommended.yaml*: Includes the recommended properties for `dcat:Dataset` and `dcat:Distribution` according to the DCAT AP specification. You can use this schema with the `euro_dcat_ap_2` (+ `euro_dcat_scheming`) and `euro_dcat_ap_3` profiles. +* *dcat_ap_full.yaml*: Includes most of the properties defined for `dcat:Dataset` and `dcat:Distribution` in the [DCAT AP 2.1](https://semiceu.github.io/DCAT-AP/releases/2.1.1/) and [DCAT AP v3](https://semiceu.github.io/DCAT-AP/releases/3.0.0/) specification. You can use this schema with the `euro_dcat_ap_2` (+ `euro_dcat_scheming`) and `euro_dcat_ap_3` profiles. Most sites will want to use these as a base to create their own custom schema to address their own requirements, perhaps alongside a [custom profile](#writing-custom-profiles). Of course site maintainers can add or remove schema fields, as well as change the existing validators. diff --git a/ckanext/dcat/schemas/dcat_ap_3_full.yaml b/ckanext/dcat/schemas/dcat_ap_3_full.yaml deleted file mode 100644 index 8f9f4afc..00000000 --- a/ckanext/dcat/schemas/dcat_ap_3_full.yaml +++ /dev/null @@ -1,384 +0,0 @@ -scheming_version: 2 -dataset_type: dataset -about: Full DCAT AP 2.1 schema -about_url: http://github.com/ckan/ckanext-dcat - -dataset_fields: - -- field_name: title - label: Title - preset: title - required: true - help_text: A descriptive title for the dataset. - -- field_name: name - label: URL - preset: dataset_slug - form_placeholder: eg. my-dataset - -- field_name: notes - label: Description - required: true - form_snippet: markdown.html - help_text: A free-text account of the dataset. - -- field_name: tag_string - label: Keywords - preset: tag_string_autocomplete - form_placeholder: eg. economy, mental health, government - help_text: Keywords or tags describing the dataset. Use commas to separate multiple values. - -- field_name: contact - label: Contact points - repeating_label: Contact point - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: email - label: Email - display_snippet: email.html - help_text: Contact information for enquiries about the dataset. - -- field_name: publisher - label: Publisher - repeating_label: Publisher - repeating_once: true - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: email - label: Email - display_snippet: email.html - - - field_name: url - label: URL - display_snippet: link.html - - - field_name: type - label: Type - help_text: Entity responsible for making the dataset available. - -- field_name: license_id - label: License - form_snippet: license.html - help_text: License definitions and additional information can be found at http://opendefinition.org/. - -- field_name: owner_org - label: Organization - preset: dataset_organization - help_text: The CKAN organization the dataset belongs to. - -- field_name: url - label: Landing page - form_placeholder: http://example.com/dataset.json - display_snippet: link.html - help_text: Web page that can be navigated to gain access to the dataset, its distributions and/or additional information. - - # Note: this will fall back to metadata_created if not present -- field_name: issued - label: Release date - preset: dcat_date - help_text: Date of publication of the dataset. - - # Note: this will fall back to metadata_modified if not present -- field_name: modified - label: Modification date - preset: dcat_date - help_text: Most recent date on which the dataset was changed, updated or modified. - -- field_name: version - label: Version - validators: ignore_missing unicode_safe package_version_validator - help_text: Version number or other version designation of the dataset. - -- field_name: version_notes - label: Version notes - validators: ignore_missing unicode_safe - form_snippet: markdown.html - display_snippet: markdown.html - help_text: A description of the differences between this version and a previous version of the dataset. - - # Note: CKAN will generate a unique identifier for each dataset -- field_name: identifier - label: Identifier - help_text: A unique identifier of the dataset. - -- field_name: frequency - label: Frequency - help_text: The frequency at which dataset is published. - -- field_name: provenance - label: Provenance - form_snippet: markdown.html - display_snippet: markdown.html - help_text: A statement about the lineage of the dataset. - -- field_name: dcat_type - label: Type - help_text: The type of the dataset. - # TODO: controlled vocabulary? - -- field_name: temporal_coverage - label: Temporal coverage - repeating_subfields: - - - field_name: start - label: Start - preset: dcat_date - - - field_name: end - label: End - preset: dcat_date - help_text: The temporal period or periods the dataset covers. - -- field_name: temporal_resolution - label: Temporal resolution - help_text: Minimum time period resolvable in the dataset. - -- field_name: spatial_coverage - label: Spatial coverage - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: text - label: Label - - - field_name: geom - label: Geometry - - - field_name: bbox - label: Bounding Box - - - field_name: centroid - label: Centroid - help_text: A geographic region that is covered by the dataset. - -- field_name: spatial_resolution_in_meters - label: Spatial resolution in meters - help_text: Minimum spatial separation resolvable in a dataset, measured in meters. - -- field_name: access_rights - label: Access rights - validators: ignore_missing unicode_safe - form_snippet: markdown.html - display_snippet: markdown.html - help_text: Information that indicates whether the dataset is Open Data, has access restrictions or is not public. - -- field_name: alternate_identifier - label: Other identifier - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: This property refers to a secondary identifier of the dataset, such as MAST/ADS, DataCite, DOI, etc. - -- field_name: theme - label: Theme - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A category of the dataset. A Dataset may be associated with multiple themes. - -- field_name: language - label: Language - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: Language or languages of the dataset. - # TODO: language form snippet / validator / graph - -- field_name: documentation - label: Documentation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A page or document about this dataset. - -- field_name: conforms_to - label: Conforms to - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: An implementing rule or other specification that the dataset follows. - -- field_name: is_referenced_by - label: Is referenced by - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A related resource, such as a publication, that references, cites, or otherwise points to the dataset. - -- field_name: applicable_legislation - label: Applicable legislation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: The legislation that mandates the creation or management of the dataset. - -#- field_name: hvd_category -# label: HVD Category -# preset: multiple_text -# validators: ignore_missing scheming_multiple_text -# TODO: implement separately as part of wider HVD support - -# Note: if not provided, this will be autogenerated -- field_name: uri - label: URI - help_text: An URI for this dataset (if not provided it will be autogenerated). - -# TODO: relation-based properties are not yet included (e.g. is_version_of, source, sample, etc) -# -resource_fields: - -- field_name: url - label: URL - preset: resource_url_upload - -- field_name: name - label: Name - form_placeholder: - help_text: A descriptive title for the resource. - -- field_name: description - label: Description - form_snippet: markdown.html - help_text: A free-text account of the resource. - -- field_name: format - label: Format - preset: resource_format_autocomplete - help_text: File format. If not provided it will be guessed. - -- field_name: mimetype - label: Media type - validators: if_empty_guess_format ignore_missing unicode_safe - help_text: Media type for this format. If not provided it will be guessed. - -- field_name: compress_format - label: Compress format - help_text: The format of the file in which the data is contained in a compressed form. - -- field_name: package_format - label: Package format - help_text: The format of the file in which one or more data files are grouped together. - -- field_name: size - label: Size - validators: ignore_missing int_validator - form_snippet: number.html - display_snippet: file_size.html - help_text: File size in bytes - -- field_name: hash - label: Hash - help_text: Checksum of the downloaded file. - -- field_name: hash_algorithm - label: Hash Algorithm - help_text: Algorithm used to calculate to checksum. - -- field_name: rights - label: Rights - form_snippet: markdown.html - display_snippet: markdown.html - help_text: Some statement about the rights associated with the resource. - -- field_name: availability - label: Availability - help_text: Indicates how long it is planned to keep the resource available. - -- field_name: status - label: Status - preset: select - choices: - - value: http://purl.org/adms/status/Completed - label: Completed - - value: http://purl.org/adms/status/UnderDevelopment - label: Under Development - - value: http://purl.org/adms/status/Deprecated - label: Deprecated - - value: http://purl.org/adms/status/Withdrawn - label: Withdrawn - help_text: The status of the resource in the context of maturity lifecycle. - -- field_name: license - label: License - help_text: License in which the resource is made available. If not provided will be inherited from the dataset. - - # Note: this falls back to the standard resource url field -- field_name: access_url - label: Access URL - help_text: URL that gives access to the dataset (defaults to the standard resource URL). - - # Note: this falls back to the standard resource url field -- field_name: download_url - label: Download URL - help_text: URL that provides a direct link to a downloadable file (defaults to the standard resource URL). - -- field_name: issued - label: Release date - preset: dcat_date - help_text: Date of publication of the resource. - -- field_name: modified - label: Modification date - preset: dcat_date - help_text: Most recent date on which the resource was changed, updated or modified. - -- field_name: language - label: Language - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: Language or languages of the resource. - -- field_name: documentation - label: Documentation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A page or document about this resource. - -- field_name: conforms_to - label: Conforms to - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: An established schema to which the described resource conforms. - -- field_name: applicable_legislation - label: Applicable legislation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: The legislation that mandates the creation or management of the resource. - -- field_name: access_services - label: Access services - repeating_label: Access service - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: title - label: Title - - - field_name: endpoint_description - label: Endpoint description - - - field_name: endpoint_url - label: Endpoint URL - preset: multiple_text - - - field_name: serves_dataset - label: Serves dataset - preset: multiple_text - validators: ignore_missing scheming_multiple_text - - help_text: A data service that gives access to the resource. - - # Note: if not provided, this will be autogenerated -- field_name: uri - label: URI - help_text: An URI for this resource (if not provided it will be autogenerated). diff --git a/ckanext/dcat/schemas/dcat_ap_2.1_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml similarity index 100% rename from ckanext/dcat/schemas/dcat_ap_2.1_full.yaml rename to ckanext/dcat/schemas/dcat_ap_full.yaml diff --git a/ckanext/dcat/schemas/dcat_ap_2.1_recommended.yaml b/ckanext/dcat/schemas/dcat_ap_recommended.yaml similarity index 100% rename from ckanext/dcat/schemas/dcat_ap_2.1_recommended.yaml rename to ckanext/dcat/schemas/dcat_ap_recommended.yaml diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index de8a8a88..bec6d911 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -32,7 +32,7 @@ @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", @@ -673,7 +673,7 @@ def test_dcat_date(self): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", @@ -705,7 +705,7 @@ def test_mimetype_is_guessed(self): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", @@ -860,7 +860,7 @@ def test_e2e_dcat_to_ckan(self): @pytest.mark.usefixtures("with_plugins", "clean_db", "clean_index") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", diff --git a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_parse.py index 286a9692..e887a24d 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_parse.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_parse.py @@ -9,7 +9,7 @@ @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_3_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", diff --git a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py index 382d6c76..e0001526 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py @@ -32,7 +32,7 @@ class TestEuroDCATAP3ProfileSerializeDataset(BaseSerializeTest): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_3_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", diff --git a/ckanext/dcat/tests/shacl/test_shacl.py b/ckanext/dcat/tests/shacl/test_shacl.py index af027491..7dbd8a5b 100644 --- a/ckanext/dcat/tests/shacl/test_shacl.py +++ b/ckanext/dcat/tests/shacl/test_shacl.py @@ -49,7 +49,7 @@ def _results_count(results_graph): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", @@ -73,7 +73,7 @@ def test_validate_dcat_ap_2_graph_shapes(): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", @@ -127,7 +127,7 @@ def test_validate_dcat_ap_2_legacy_graph_shapes_recommended(): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", @@ -167,7 +167,7 @@ def test_validate_dcat_ap_2_graph_shapes_range(): @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( - "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_3_full.yaml" + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets",