diff --git a/lib/geo_combine.rb b/lib/geo_combine.rb index 1a934e1..64bb0dc 100644 --- a/lib/geo_combine.rb +++ b/lib/geo_combine.rb @@ -72,6 +72,9 @@ def to_html # Require harvesting/indexing files require 'geo_combine/geo_blacklight_harvester' +# Migrators +require 'geo_combine/migrators/v1_aardvark_migrator' + # Require gem files require 'geo_combine/version' require 'geo_combine/railtie' if defined?(Rails) diff --git a/lib/geo_combine/migrators/v1_aardvark_migrator.rb b/lib/geo_combine/migrators/v1_aardvark_migrator.rb new file mode 100644 index 0000000..2f07a0e --- /dev/null +++ b/lib/geo_combine/migrators/v1_aardvark_migrator.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module GeoCombine + module Migrators + # TODO: WARNING! This class is not fully implemented and should not be used in + # production. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121 + # for remaining work. + # + # migrates the v1 schema to the aardvark schema + class V1AardvarkMigrator + attr_reader :v1_hash + + # @param v1_hash [Hash] parsed json in the v1 schema + def initialize(v1_hash:) + @v1_hash = v1_hash + end + + def run + v2_hash = convert_keys + v2_hash['gbl_mdVersion_s'] = 'Aardvark' + v2_hash + end + + def convert_keys + v1_hash.transform_keys do |k| + SCHEMA_FIELD_MAP[k] || k + end + end + + SCHEMA_FIELD_MAP = { + 'dc_title_s' => 'dct_title_s', # new namespace + 'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued + 'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued + 'dc_language_sm' => 'dct_language_sm', # new namespace; single to multi-valued + 'dc_creator_sm' => 'dct_creator_sm', # new namespace + 'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued + 'dct_provenance_s' => 'schema_provider_s', # new URI name + 'dc_subject_sm' => 'dct_subject_sm', # new namespace + 'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued + 'dc_source_sm' => 'dct_source_sm', # new namespace + 'dc_rights_s' => 'dct_accessRights_s', # new URI name + 'dc_format_s' => 'dct_format_s', # new namespace + 'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name + 'layer_slug_s' => 'id', # new URI name + 'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued + 'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name + 'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name + 'suppressed_b' => 'gbl_suppressed_b' # new namespace + }.freeze + end + end +end diff --git a/spec/fixtures/docs/full_geoblacklight_aardvark.json b/spec/fixtures/docs/full_geoblacklight_aardvark.json new file mode 100644 index 0000000..00debf4 --- /dev/null +++ b/spec/fixtures/docs/full_geoblacklight_aardvark.json @@ -0,0 +1,33 @@ +{ + "gbl_mdVersion_s":"Aardvark", + "dct_identifier_sm":"http://purl.stanford.edu/cz128vq0535", + "dct_title_s":"2005 Rural Poverty GIS Database: Uganda", + "dct_description_sm":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.", + "dct_accessRights_s":"Public", + "schema_provider_s":"Stanford", + "dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}", + "gbl_wxsIdentifier_s":"druid:cz128vq0535", + "id":"stanford-cz128vq0535", + "layer_geom_type_s":"Polygon", + "gbl_mdModified_dt":"2015-01-13T18:46:38Z", + "dct_format_s":"Shapefile", + "dct_language_sm":"English", + "dc_type_s":"Dataset", + "dct_publisher_sm":"Uganda Bureau of Statistics", + "dct_creator_sm":[ + "Uganda Bureau of Statistics" + ], + "dct_subject_sm":[ + "Poverty", + "Statistics" + ], + "dct_issued_s":"2005", + "dct_temporal_sm":[ + "2005" + ], + "dct_spatial_sm":[ + "Uganda" + ], + "solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)", + "gbl_indexYear_im":2005 +} diff --git a/spec/fixtures/json_docs.rb b/spec/fixtures/json_docs.rb index d56f23a..c6a2ed3 100644 --- a/spec/fixtures/json_docs.rb +++ b/spec/fixtures/json_docs.rb @@ -15,6 +15,12 @@ def full_geoblacklight File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight.json')) end + ## + # full_geoblacklight fixture converted to the aardvark schema + def full_geoblacklight_aardvark + File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight_aardvark.json')) + end + ## # A sample Esri OpenData metadata record def esri_opendata_metadata diff --git a/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb new file mode 100644 index 0000000..ed81929 --- /dev/null +++ b/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do + include JsonDocs + + describe '#run' do + it 'migrates keys' do + input_hash = JSON.parse(full_geoblacklight) + # TODO: Note that this fixture has not yet been fully converted to + # aardvark. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121 + # for remaining work. + expected_output = JSON.parse(full_geoblacklight_aardvark) + expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output) + end + + context 'when the given record is already in aardvark schema' do + xit 'returns the record unchanged' + end + end +end