Skip to content

Commit

Permalink
feat: support loading Shapefiles for XML schemas
Browse files Browse the repository at this point in the history
Includes:

- auto-detect types not following ShapeSchemaReader conventions
- load data even if geometry propery or property namesspaces don't match
- don't add null for properties that are not nillable

ING-4416
  • Loading branch information
stempler committed Aug 7, 2024
1 parent 5f8fbd7 commit 0f64ad3
Show file tree
Hide file tree
Showing 18 changed files with 588 additions and 121 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) 2024 wetransform GmbH
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the License,
* or (at your option) any later version.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* wetransform GmbH <http://www.wetransform.to>
*/

package eu.esdihumboldt.hale.common.align.helper

import java.util.function.Function

import javax.xml.namespace.QName

import eu.esdihumboldt.hale.common.align.model.AlignmentUtil
import eu.esdihumboldt.hale.common.align.model.EntityDefinition
import eu.esdihumboldt.hale.common.core.report.SimpleLog
import eu.esdihumboldt.hale.common.schema.SchemaSpaceID
import eu.esdihumboldt.hale.common.schema.model.PropertyDefinition
import eu.esdihumboldt.hale.common.schema.model.TypeDefinition
import eu.esdihumboldt.hale.common.schema.model.constraint.type.GeometryType

/**
* Helper for identifying geometry properties
*
* @author Simon Templer
*/
class GeometryPropertyFinder {

static QName findGeometryProperty(TypeDefinition type, Function<TypeDefinition, Collection<? extends PropertyDefinition>> propertyFinder, String preferredName, SimpleLog log) {
// allow for geometry property types with choices
int checkLevels = 3

// create finder for geometry properties
EntityFinder finder = new EntityFinder({ EntityDefinition entity ->
// determine if the property classifies as
if (entity.getDefinition() instanceof PropertyDefinition) {
def propertyType = ((PropertyDefinition) entity.getDefinition()).getPropertyType()

boolean isGeometry = propertyType.getConstraint(GeometryType).isGeometry()
if (isGeometry) {
return true
}
}

false
}, checkLevels)

def parents = propertyFinder.apply(type).collect { PropertyDefinition p ->
AlignmentUtil.createEntityFromDefinitions(type, [p], SchemaSpaceID.SOURCE, null)
}

def candidates = finder.find(parents)

if (candidates.empty) {
null
}
else {
// select candidate

// extract main property names; order matters because of traversal order for finding the candidates
Set<QName> names = new LinkedHashSet(candidates*.propertyPath[0]*.child*.name)

// prefer geometry column name w/o namespace
def preferred = new QName(preferredName)
if (names.contains(preferred)) {
return preferred
}

// otherwise prefer any with geometry column name
preferred = names.find { preferredName == it.localPart }

if (preferred == null) {
// otherwise use first one
preferred = names.iterator().next()
}

log.info("Identified property $preferred as geometry property for type ${type.name.localPart}")

preferred
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package eu.esdihumboldt.hale.common.instance.model;

import java.util.ArrayList;
import java.util.List;

/**
* Basic instance collection interface.
*
Expand Down Expand Up @@ -73,4 +76,22 @@ public interface InstanceCollection extends InstanceResolver {
// TODO what else is needed?
// public InstanceCollection[] partition(...);

/**
* Helper for converting to list of instances.
*
* Use with care only in cases where it is clear that the data is not too
* big.
*
* @return a list containing all instances
*/
default List<Instance> toList() {
List<Instance> result = new ArrayList<Instance>();
try (ResourceIterator<Instance> iterator = iterator()) {
while (iterator.hasNext()) {
result.add(iterator.next());
}
}
return result;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,26 @@
package eu.esdihumboldt.hale.io.geopackage.internal

import java.sql.SQLException
import java.util.function.Function

import javax.xml.namespace.QName

import org.locationtech.jts.io.WKBReader

import eu.esdihumboldt.hale.common.align.helper.EntityFinder
import eu.esdihumboldt.hale.common.align.model.AlignmentUtil
import eu.esdihumboldt.hale.common.align.model.EntityDefinition
import eu.esdihumboldt.hale.common.align.helper.GeometryPropertyFinder
import eu.esdihumboldt.hale.common.core.report.SimpleLog
import eu.esdihumboldt.hale.common.instance.geometry.DefaultGeometryProperty
import eu.esdihumboldt.hale.common.instance.geometry.impl.CodeDefinition
import eu.esdihumboldt.hale.common.instance.geometry.impl.WKTDefinition
import eu.esdihumboldt.hale.common.instance.groovy.InstanceBuilder
import eu.esdihumboldt.hale.common.instance.model.Instance
import eu.esdihumboldt.hale.common.schema.SchemaSpaceID
import eu.esdihumboldt.hale.common.schema.geometry.CRSDefinition
import eu.esdihumboldt.hale.common.schema.model.DefinitionUtil
import eu.esdihumboldt.hale.common.schema.model.PropertyDefinition
import eu.esdihumboldt.hale.common.schema.model.TypeDefinition
import eu.esdihumboldt.hale.common.schema.model.constraint.property.Cardinality
import eu.esdihumboldt.hale.common.schema.model.constraint.property.NillableFlag
import eu.esdihumboldt.hale.common.schema.model.constraint.type.GeometryMetadata
import eu.esdihumboldt.hale.common.schema.model.constraint.type.GeometryType
import eu.esdihumboldt.hale.io.geopackage.GeopackageSchemaBuilder
import groovy.transform.CompileStatic
import mil.nga.geopackage.core.srs.SpatialReferenceSystem
Expand Down Expand Up @@ -125,57 +122,10 @@ class TableInstanceBuilder {

private QName getGeometryProperty(TypeDefinition type, final String geometryColumn) {
return cachedGeometryProperty.computeIfAbsent(type) { TypeDefinition t ->
// allow for geometry property types with choices
int checkLevels = 3

// create finder for geometry properties
EntityFinder finder = new EntityFinder({ EntityDefinition entity ->
// determine if the property classifies as
if (entity.getDefinition() instanceof PropertyDefinition) {
def propertyType = ((PropertyDefinition) entity.getDefinition()).getPropertyType()

boolean isGeometry = propertyType.getConstraint(GeometryType).isGeometry()
if (isGeometry) {
return true
}
}

false
}, checkLevels)

def parents = getProperties(type).collect { PropertyDefinition p ->
AlignmentUtil.createEntityFromDefinitions(type, [p], SchemaSpaceID.SOURCE, null)
}

def candidates = finder.find(parents)

if (candidates.empty) {
null
}
else {
// select candidate

// extract main property names; order matters because of traversal order for finding the candidates
Set<QName> names = new LinkedHashSet(candidates*.propertyPath[0]*.child*.name)

// prefer geometry column name w/o namespace
def preferred = new QName(geometryColumn)
if (names.contains(preferred)) {
return preferred
}

// otherwise prefer any with geometry column name
preferred = names.find { geometryColumn == it.localPart }

if (preferred == null) {
// otherwise use first one
preferred = names.iterator().next()
}

log.info("Identified property $preferred as geometry property for type ${type.name.localPart}")

preferred
Function<TypeDefinition, Collection<PropertyDefinition>> lister = { TypeDefinition tt ->
getProperties(tt)
}
GeometryPropertyFinder.findGeometryProperty(type, lister, geometryColumn, log)
}
}

Expand Down Expand Up @@ -284,5 +234,4 @@ class TableInstanceBuilder {
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ Require-Bundle: org.junit;bundle-version="4.13.0",
org.eclipse.core.runtime;bundle-version="3.17.100",
eu.esdihumboldt.hale.common.schema.groovy;bundle-version="4.1.0",
eu.esdihumboldt.cst.functions.geometric;bundle-version="4.1.0",
groovy;bundle-version="2.5.19"
groovy;bundle-version="2.5.19",
assertj-core;bundle-version="3.22.0"
Import-Package: de.fhg.igd.slf4jplus,
eu.esdihumboldt.hale.common.filter,
eu.esdihumboldt.hale.common.schema,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Copyright (c) 2024 wetransform GmbH
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the License,
* or (at your option) any later version.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* wetransform GmbH <http://www.wetransform.to>
*/

package eu.esdihumboldt.hale.io.shp

import static org.assertj.core.api.Assertions.*;
import static org.junit.Assert.assertEquals
import static org.junit.Assert.assertNotNull
import static org.junit.Assert.assertTrue

import java.nio.charset.StandardCharsets
import java.util.function.Consumer

import org.junit.Test
import org.locationtech.jts.geom.Geometry

import eu.esdihumboldt.hale.common.core.io.impl.LogProgressIndicator
import eu.esdihumboldt.hale.common.core.io.report.IOReport
import eu.esdihumboldt.hale.common.core.io.supplier.DefaultInputSupplier
import eu.esdihumboldt.hale.common.instance.groovy.InstanceAccessor
import eu.esdihumboldt.hale.common.instance.model.Instance
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection
import eu.esdihumboldt.hale.common.schema.geometry.GeometryProperty
import eu.esdihumboldt.hale.common.schema.model.Schema
import eu.esdihumboldt.hale.common.test.TestUtil
import eu.esdihumboldt.hale.io.shp.reader.internal.ShapeInstanceReader
import groovy.transform.CompileStatic
import groovy.transform.TypeCheckingMode

/**
* Tests for reading Shapefiles.
*
* @author Simon Templer
*/
@CompileStatic
class ShapeInstanceReaderTest {

@Test
void testReadShapeInstances() {
Schema xmlSchema = TestUtil.loadSchema(getClass().getClassLoader().getResource("testdata/arokfnp/ikg.shp").toURI())

InstanceCollection instances = loadInstances(xmlSchema, getClass().getClassLoader().getResource("testdata/arokfnp/ikg.shp").toURI())

assertNotNull(instances)
List<Instance> list = instances.toList()

// test count
assertThat(list).hasSize(14)

// instance validation
validateArokFnpIkg(list, 'the_geom')
}

@Test
void testReadXsdInstances() {
Schema xmlSchema = TestUtil.loadSchema(getClass().getClassLoader().getResource("testdata/arokfnp/arok-fnp.xsd").toURI())

InstanceCollection instances = loadInstances(xmlSchema, getClass().getClassLoader().getResource("testdata/arokfnp/ikg.shp").toURI())

assertNotNull(instances)
List<Instance> list = instances.toList()

// test count
assertThat(list).hasSize(14)

// instance validation
validateArokFnpIkg(list, 'geometrie')
}

@CompileStatic(TypeCheckingMode.SKIP)
private void validateArokFnpIkg(List<Instance> instances, String geometryPropertyName) {
Map<String, List<Instance>> instancesByType = [:]

instances.iterator().with {
while(it.hasNext()) {
Instance instance = it.next()

String typeName = instance.getDefinition().displayName
instancesByType.computeIfAbsent(typeName) { []}
instancesByType[typeName] << instance
}
}

// check count of feature types
assertThat(instancesByType)
.hasSize(1)
.containsOnlyKeys('ikg')

// check counts per type

assertEquals 14, instancesByType.ikg.size()

// check ikg
def ikg = { new InstanceAccessor(instancesByType.ikg) }
assertThat(ikg().aktennr.values().findAll())
.hasSize(2)

assertThat(ikg().bezeich.values())
.containsExactlyInAnyOrder('Dußlingen-Gomaringen-Nehren, Musburg-Hönisch', 'Engstingen-Hohenstein-Trochtelfingen, Haidt', 'FNP Meersburg (GVV)', 'IKG Berg', 'Immenstaad-Friedrichshafen, Steigwiesen', 'Kirchberg-Weihungstal (Staig), Gassenäcker', 'Laichinger Alb', 'Meßkirch, Industriepark Nördlicher Bodensee', 'Munderkingen', 'Ostrach, Königsegg', 'Reutlingen-Kirchentellinsfurt, Mahden', 'Reutlingen-Kusterdingen, Mark West', 'Wangen-Amtzell, Geiselharz-Schauwies', 'Winterlingen-Straßberg, Vogelherd/Längenfeld')

assertThat(ikg()."$geometryPropertyName".values().findAll())
.as('ikg geometries')
.hasSize(14)
.allSatisfy({ value ->
assertThat(value)
.isInstanceOf(GeometryProperty)
assertThat(value.geometry)
.isNotNull()
.isInstanceOf(Geometry)
assertThat(value.CRSDefinition)
.isNotNull()
assertThat(value.CRSDefinition.CRS)
.isNotNull()
} as Consumer)
}

// helpers

/**
* Load an instance collection from a Shapefile.
*
* @param schema the schema to use
* @param resource the file to load
* @return the loaded instance collection
*/
static InstanceCollection loadInstances(Schema schema, URI resource) {
ShapeInstanceReader reader = new ShapeInstanceReader()

reader.setSource(new DefaultInputSupplier(resource))
reader.setSourceSchema(schema)

reader.setCharset(StandardCharsets.UTF_8)

IOReport report = reader.execute(new LogProgressIndicator())

assertTrue(report.isSuccess())
assertTrue(report.getErrors().isEmpty())

return reader.getInstances();
}
}
Loading

0 comments on commit 0f64ad3

Please sign in to comment.