From aadfc78c2505d5096e6dfbe9a9056ceccb237d9e Mon Sep 17 00:00:00 2001 From: Anna Headley Date: Tue, 22 Feb 2022 12:18:41 -0500 Subject: [PATCH 1/3] Sketch initial migrator for v1 to aardvark Note that the fixture called aardvark hasn't actually been converted yet, is just a straight up copy of the full_geoblacklight fixutre refs #121 --- lib/geo_combine.rb | 3 ++ .../migrators/v1_aardvark_migrator.rb | 19 +++++++++++ .../docs/full_geoblacklight_aardvark.json | 33 +++++++++++++++++++ spec/fixtures/json_docs.rb | 6 ++++ .../migrators/v1_aardvark_migrator_spec.rb | 15 +++++++++ 5 files changed, 76 insertions(+) create mode 100644 lib/geo_combine/migrators/v1_aardvark_migrator.rb create mode 100644 spec/fixtures/docs/full_geoblacklight_aardvark.json create mode 100644 spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb diff --git a/lib/geo_combine.rb b/lib/geo_combine.rb index 1a934e1..64bb0dc 100644 --- a/lib/geo_combine.rb +++ b/lib/geo_combine.rb @@ -72,6 +72,9 @@ def to_html # Require harvesting/indexing files require 'geo_combine/geo_blacklight_harvester' +# Migrators +require 'geo_combine/migrators/v1_aardvark_migrator' + # Require gem files require 'geo_combine/version' require 'geo_combine/railtie' if defined?(Rails) diff --git a/lib/geo_combine/migrators/v1_aardvark_migrator.rb b/lib/geo_combine/migrators/v1_aardvark_migrator.rb new file mode 100644 index 0000000..3430530 --- /dev/null +++ b/lib/geo_combine/migrators/v1_aardvark_migrator.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module GeoCombine + module Migrators + # migrates the v1 schema to the aardvark schema + class V1AardvarkMigrator + attr_reader :v1_hash + + # @param v1_hash [Hash] parsed json in the v1 schema + def initialize(v1_hash:) + @v1_hash = v1_hash + end + + def run + v1_hash + end + end + end +end diff --git a/spec/fixtures/docs/full_geoblacklight_aardvark.json b/spec/fixtures/docs/full_geoblacklight_aardvark.json new file mode 100644 index 0000000..e58e835 --- /dev/null +++ b/spec/fixtures/docs/full_geoblacklight_aardvark.json @@ -0,0 +1,33 @@ +{ + "geoblacklight_version":"1.0", + "dc_identifier_s":"http://purl.stanford.edu/cz128vq0535", + "dc_title_s":"2005 Rural Poverty GIS Database: Uganda", + "dc_description_s":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.", + "dc_rights_s":"Public", + "dct_provenance_s":"Stanford", + "dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}", + "layer_id_s":"druid:cz128vq0535", + "layer_slug_s":"stanford-cz128vq0535", + "layer_geom_type_s":"Polygon", + "layer_modified_dt":"2015-01-13T18:46:38Z", + "dc_format_s":"Shapefile", + "dc_language_s":"English", + "dc_type_s":"Dataset", + "dc_publisher_s":"Uganda Bureau of Statistics", + "dc_creator_sm":[ + "Uganda Bureau of Statistics" + ], + "dc_subject_sm":[ + "Poverty", + "Statistics" + ], + "dct_issued_s":"2005", + "dct_temporal_sm":[ + "2005" + ], + "dct_spatial_sm":[ + "Uganda" + ], + "solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)", + "solr_year_i":2005 +} diff --git a/spec/fixtures/json_docs.rb b/spec/fixtures/json_docs.rb index d56f23a..c6a2ed3 100644 --- a/spec/fixtures/json_docs.rb +++ b/spec/fixtures/json_docs.rb @@ -15,6 +15,12 @@ def full_geoblacklight File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight.json')) end + ## + # full_geoblacklight fixture converted to the aardvark schema + def full_geoblacklight_aardvark + File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight_aardvark.json')) + end + ## # A sample Esri OpenData metadata record def esri_opendata_metadata diff --git a/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb new file mode 100644 index 0000000..d23cde9 --- /dev/null +++ b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do + include JsonDocs + + describe '#run' do + it 'migrates keys' do + input_hash = JSON.parse(full_geoblacklight) + expected_output = JSON.parse(full_geoblacklight_aardvark) + expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output) + end + end +end From b30236ba5017988201336951ebc5efd8b857633e Mon Sep 17 00:00:00 2001 From: Anna Headley Date: Tue, 22 Feb 2022 13:26:32 -0500 Subject: [PATCH 2/3] Convert all the keys, and the metadata version value --- .../migrators/v1_aardvark_migrator.rb | 34 ++++++++++++++++++- .../docs/full_geoblacklight_aardvark.json | 30 ++++++++-------- .../migrators/v1_aardvark_migrator_spec.rb | 3 ++ 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/lib/geo_combine/migrators/v1_aardvark_migrator.rb b/lib/geo_combine/migrators/v1_aardvark_migrator.rb index 3430530..fc3507c 100644 --- a/lib/geo_combine/migrators/v1_aardvark_migrator.rb +++ b/lib/geo_combine/migrators/v1_aardvark_migrator.rb @@ -11,9 +11,41 @@ def initialize(v1_hash:) @v1_hash = v1_hash end + # TODO: elements without a crosswalk: + # https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/aboutAardvark.md#elements-without-a-crosswalk def run - v1_hash + # TODO: return unless it's version 1 + v2_hash = convert_keys + v2_hash['gbl_mdVersion_s'] = "Aardvark" + v2_hash end + + def convert_keys + v1_hash.transform_keys do |k| + SCHEMA_FIELD_MAP[k] || k + end + end + + SCHEMA_FIELD_MAP = { + 'dc_title_s' => 'dct_title_s', # new namespace + 'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued + 'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued + 'dc_language_sm' => 'dct_language_sm', # new namespace; single to multi-valued + 'dc_creator_sm' => 'dct_creator_sm', # new namespace + 'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued + 'dct_provenance_s' => 'schema_provider_s', # new URI name + 'dc_subject_sm' => 'dct_subject_sm', # new namespace + 'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued + 'dc_source_sm' => 'dct_source_sm', # new namespace + 'dc_rights_s' => 'dct_accessRights_s', # new URI name + 'dc_format_s' => 'dct_format_s', # new namespace + 'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name + 'layer_slug_s' => 'id', # new URI name + 'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued + 'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name + 'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name + 'suppressed_b' => 'gbl_suppressed_b' # new namespace + }.freeze end end end diff --git a/spec/fixtures/docs/full_geoblacklight_aardvark.json b/spec/fixtures/docs/full_geoblacklight_aardvark.json index e58e835..00debf4 100644 --- a/spec/fixtures/docs/full_geoblacklight_aardvark.json +++ b/spec/fixtures/docs/full_geoblacklight_aardvark.json @@ -1,23 +1,23 @@ { - "geoblacklight_version":"1.0", - "dc_identifier_s":"http://purl.stanford.edu/cz128vq0535", - "dc_title_s":"2005 Rural Poverty GIS Database: Uganda", - "dc_description_s":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.", - "dc_rights_s":"Public", - "dct_provenance_s":"Stanford", + "gbl_mdVersion_s":"Aardvark", + "dct_identifier_sm":"http://purl.stanford.edu/cz128vq0535", + "dct_title_s":"2005 Rural Poverty GIS Database: Uganda", + "dct_description_sm":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.", + "dct_accessRights_s":"Public", + "schema_provider_s":"Stanford", "dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}", - "layer_id_s":"druid:cz128vq0535", - "layer_slug_s":"stanford-cz128vq0535", + "gbl_wxsIdentifier_s":"druid:cz128vq0535", + "id":"stanford-cz128vq0535", "layer_geom_type_s":"Polygon", - "layer_modified_dt":"2015-01-13T18:46:38Z", - "dc_format_s":"Shapefile", - "dc_language_s":"English", + "gbl_mdModified_dt":"2015-01-13T18:46:38Z", + "dct_format_s":"Shapefile", + "dct_language_sm":"English", "dc_type_s":"Dataset", - "dc_publisher_s":"Uganda Bureau of Statistics", - "dc_creator_sm":[ + "dct_publisher_sm":"Uganda Bureau of Statistics", + "dct_creator_sm":[ "Uganda Bureau of Statistics" ], - "dc_subject_sm":[ + "dct_subject_sm":[ "Poverty", "Statistics" ], @@ -29,5 +29,5 @@ "Uganda" ], "solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)", - "solr_year_i":2005 + "gbl_indexYear_im":2005 } diff --git a/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb index d23cde9..8cc2e09 100644 --- a/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +++ b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb @@ -5,6 +5,9 @@ RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do include JsonDocs + # TODO: the input fixture doesn't have: + # - dc_source_sm + # - suppressed_b describe '#run' do it 'migrates keys' do input_hash = JSON.parse(full_geoblacklight) From c0ee7e54444c4d5b7d64d2fdbed50d66b1cf72c2 Mon Sep 17 00:00:00 2001 From: Anna Headley Date: Fri, 25 Feb 2022 16:50:43 -0500 Subject: [PATCH 3/3] Move TODOs to the issue --- lib/geo_combine/migrators/v1_aardvark_migrator.rb | 9 +++++---- .../geo_combine/migrators/v1_aardvark_migrator_spec.rb | 10 +++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/lib/geo_combine/migrators/v1_aardvark_migrator.rb b/lib/geo_combine/migrators/v1_aardvark_migrator.rb index fc3507c..2f07a0e 100644 --- a/lib/geo_combine/migrators/v1_aardvark_migrator.rb +++ b/lib/geo_combine/migrators/v1_aardvark_migrator.rb @@ -2,6 +2,10 @@ module GeoCombine module Migrators + # TODO: WARNING! This class is not fully implemented and should not be used in + # production. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121 + # for remaining work. + # # migrates the v1 schema to the aardvark schema class V1AardvarkMigrator attr_reader :v1_hash @@ -11,12 +15,9 @@ def initialize(v1_hash:) @v1_hash = v1_hash end - # TODO: elements without a crosswalk: - # https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/aboutAardvark.md#elements-without-a-crosswalk def run - # TODO: return unless it's version 1 v2_hash = convert_keys - v2_hash['gbl_mdVersion_s'] = "Aardvark" + v2_hash['gbl_mdVersion_s'] = 'Aardvark' v2_hash end diff --git a/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb index 8cc2e09..ed81929 100644 --- a/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +++ b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb @@ -5,14 +5,18 @@ RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do include JsonDocs - # TODO: the input fixture doesn't have: - # - dc_source_sm - # - suppressed_b describe '#run' do it 'migrates keys' do input_hash = JSON.parse(full_geoblacklight) + # TODO: Note that this fixture has not yet been fully converted to + # aardvark. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121 + # for remaining work. expected_output = JSON.parse(full_geoblacklight_aardvark) expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output) end + + context 'when the given record is already in aardvark schema' do + xit 'returns the record unchanged' + end end end