diff --git a/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/eu/esdihumboldt/hale/io/gml/reader/internal/StreamGmlReaderTest.groovy b/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/eu/esdihumboldt/hale/io/gml/reader/internal/StreamGmlReaderTest.groovy index 2b6885b68b..ff7aa256dd 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/eu/esdihumboldt/hale/io/gml/reader/internal/StreamGmlReaderTest.groovy +++ b/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/eu/esdihumboldt/hale/io/gml/reader/internal/StreamGmlReaderTest.groovy @@ -137,6 +137,41 @@ class StreamGmlReaderTest { assertEquals(expected, count) } + @Test + public void testWfsPagination() { + /* + * FIXME relies on external resources that are not guaranteed to exist and is thus not enabled for automated testing. + * Better would be a test that could mock the WFS responses (e.g. a mock service running w/ testcontainers) + */ + def schemaUrl = 'https://geodienste.komm.one/ows/services/org.107.7e499bca-5e63-4595-b3c4-eaece8b68608_wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=DescribeFeatureType' + def dataUrl = 'https://geodienste.komm.one/ows/services/org.107.7e499bca-5e63-4595-b3c4-eaece8b68608_wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&typenames=xplan:BP_Plan&resolvedepth=*' + def paging = 100 + def expected = 754 + + def schema = loadSchema(URI.create(schemaUrl)) + + Map params = [ + (StreamGmlReader.PARAM_FEATURES_PER_WFS_REQUEST): paging as String, + (StreamGmlReader.PARAM_PAGINATE_REQUEST): 'true' + ] + + def instances = loadGml(URI.create(dataUrl), schema, params) + + int count = 0 + instances.iterator().withCloseable { it -> + while (it.hasNext()) { + ((InstanceIterator) it).skip() + count++ + if (count % 100 == 0) { + println("$count instances skipped") + } + } + } + + println("$count instances skipped") + assertEquals(expected, count) + } + // helpers Schema loadSchema(URI schemaLocation) throws Exception { diff --git a/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java b/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java index 17a7b7f08e..1c5b427b36 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java +++ b/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java @@ -238,9 +238,7 @@ public WfsBackedGmlInstanceCollection(LocatableInputSupplier uniqIDInstancesAdditionalObjects = new HashSet(); + private final HashSet uniqueIDInstancesAdditionalObjects = new HashSet(); // store the "main" features of the GML - private final HashSet uniqIDInstances = new HashSet(); + private final HashSet uniqueIDMainInstances = new HashSet(); /** * Create the iterator @@ -575,9 +571,14 @@ public boolean hasNext() { * number of results reported by the WFS. */ protected boolean isFeatureLimitReached() { + // the condition (totalFeaturesProcessed >= size && + // !iterator.hasNext()) should be there in order to process the + // instances coming from the additionalObjects after the last "main" + // instance return (maxNumberOfFeatures != UNLIMITED && totalFeaturesProcessed >= maxNumberOfFeatures) - || (size != UNKNOWN_SIZE && totalFeaturesProcessed >= size); + || (size != UNKNOWN_SIZE && totalFeaturesProcessed >= size + && !iterator.hasNext()); } /** @@ -591,31 +592,126 @@ public Instance next() { Instance instance = iterator.next(); - if (instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null - && !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS).isEmpty()) { + if (primordialQueryParams.containsKey("RESOLVEDEPTH")) { + return processInstanceWithResolveDepth(instance); + } + else { + return new StreamGmlInstance(instance, totalFeaturesProcessed++); + } + } - for (QName propertyName : instance.getPropertyNames()) { - if ((propertyName.getNamespaceURI().startsWith(GMLConstants.NS_WFS) - || propertyName.getNamespaceURI() - .startsWith(GMLConstants.GML_NAMESPACE_CORE)) - && (propertyName.getLocalPart().equals("id") - && propertyName.getPrefix().equals("gml"))) { - Object[] gmlID = instance.getProperty(propertyName); - if (gmlID[0] != null) { - String gmlIDToCheck = (String) gmlID[0]; - - if (!uniqIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { - uniqIDInstancesAdditionalObjects.add(gmlIDToCheck); - additionalFeatureProcessed++; + /** + * @param instance Instance + * @return Instance + */ + private Instance processInstanceWithResolveDepth(Instance instance) { + for (QName propertyName : instance.getPropertyNames()) { + if (isGmlIdProperty(propertyName)) { + Object[] gmlID = instance.getProperty(propertyName); + if (gmlID[0] != null) { + String gmlIDToCheck = (String) gmlID[0]; + + if (instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null + && !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) + .isEmpty()) { + if (!uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { + if (uniqueIDMainInstances.contains(gmlIDToCheck)) { + if (iterator.hasNext()) { + return next(); + } + } + uniqueIDInstancesAdditionalObjects.add(gmlIDToCheck); + return new StreamGmlInstance(instance, totalFeaturesProcessed); + } + } + else { + if (!uniqueIDMainInstances.contains(gmlIDToCheck)) { + uniqueIDMainInstances.add(gmlIDToCheck); + totalFeaturesProcessed++; + if (uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { + uniqueIDInstancesAdditionalObjects.remove(gmlIDToCheck); + if (iterator.hasNext()) { + return next(); + } + } + return new StreamGmlInstance(instance, totalFeaturesProcessed); } } - return new StreamGmlInstance(instance, totalFeaturesProcessed); } } + } + return processRemainingInstances(); + } + private Instance processRemainingInstances() { + if (iterator.hasNext()) { + return next(); + } + else { + _closeAndRecreateIterator(); + if (iterator.hasNext()) { + return next(); + } + else { + return iterator.next(); + } } + } - return new StreamGmlInstance(instance, totalFeaturesProcessed++); + private void _closeAndRecreateIterator() { + close(); + createNextIterator(); + } + + private boolean isGmlIdProperty(QName propertyName) { + return (propertyName.getNamespaceURI().startsWith(GMLConstants.NS_WFS) + || propertyName.getNamespaceURI().startsWith(GMLConstants.GML_NAMESPACE_CORE)) + && "id".equals(propertyName.getLocalPart()) + && "gml".equals(propertyName.getPrefix()); + } + + private String _getGmlId(Instance instance, QName propertyName) { + Object[] gmlID = instance.getProperty(propertyName); + return gmlID != null && gmlID.length > 0 ? (String) gmlID[0] : null; + } + + private boolean _handleAdditionalObjects(Instance instance, String gmlIDToCheck) { + if (instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null + && !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS).isEmpty()) { + if (!uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { + if (uniqueIDMainInstances.contains(gmlIDToCheck) && iterator.hasNext()) { + next(); + System.out.println("totalFeaturesProcessed:" + totalFeaturesProcessed + + " - uniqueIDMainInstances:" + uniqueIDMainInstances.size() + + " - uniqueIDInstancesAdditionalObjects:" + + uniqueIDInstancesAdditionalObjects.size() + " 1SKIP"); + return true; + } + uniqueIDInstancesAdditionalObjects.add(gmlIDToCheck); + return true; + } + } + return false; + } + + private boolean _handleMainInstances(String gmlIDToCheck) { + if (!uniqueIDMainInstances.contains(gmlIDToCheck)) { + uniqueIDMainInstances.add(gmlIDToCheck); + totalFeaturesProcessed++; + if (uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { + uniqueIDInstancesAdditionalObjects.remove(gmlIDToCheck); + if (iterator.hasNext()) { + System.out.println("totalFeaturesProcessed:" + totalFeaturesProcessed + + " - uniqueIDMainInstances:" + uniqueIDMainInstances.size() + + " - uniqueIDInstancesAdditionalObjects:" + + uniqueIDInstancesAdditionalObjects.size() + " 2SKIP"); + next(); + return true; + } + } + return true; + } + return false; } /**