diff --git a/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/data/filter_doubles/filter_with_doubles.xml b/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/data/filter_doubles/filter_with_doubles.xml index 8d746da5e8..0cf1afe0dd 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/data/filter_doubles/filter_with_doubles.xml +++ b/io/plugins/eu.esdihumboldt.hale.io.gml.test/src/data/filter_doubles/filter_with_doubles.xml @@ -64,7 +64,7 @@ - + 48.878401 10.043646 @@ -167,7 +167,16 @@ 66-02/8 1993-04-22 BauNVO 1990 zul. geändert 22.04.1993 - + + + + + + 0 + 66-02/8 + 1993-04-22 + BauNVO 1990 zul. geändert 22.04.1993 + diff --git a/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/DuplicateIDsFilterIterator.java b/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/DuplicateIDsFilterIterator.java index 9266eceb1f..97115a09d5 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/DuplicateIDsFilterIterator.java +++ b/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/DuplicateIDsFilterIterator.java @@ -26,11 +26,10 @@ import eu.esdihumboldt.hale.io.gml.geometry.GMLConstants; /** - * Filter the IDs + * Filter the instances by gml:id, storing only those with unique IDs. */ public class DuplicateIDsFilterIterator implements InstanceIterator { - // store the "main" instances of the GML private final HashSet uniqueIDInstances = new HashSet(); private final InstanceIterator iterator; private Instance nextInstance; @@ -93,24 +92,23 @@ public void skip() { public boolean hasNext() { if (nextInstance == null || instranceAlreadyReturned) { - if (!iterator.hasNext()) { - return false; - } - else { - Instance instance = iterator.next(); - if (!isTheInstancePresent(instance)) { - nextInstance = instance; - instranceAlreadyReturned = false; - return true; + while (true) { + if (iterator != null && !iterator.hasNext()) { + return false; } else { - nextInstance = null; - instranceAlreadyReturned = true; - return false; + Instance instance = iterator.next(); + if (!isTheInstancePresent(instance)) { + nextInstance = instance; + instranceAlreadyReturned = false; + return true; + } } } } - return true; + else { + return true; + } } /** @@ -138,16 +136,10 @@ private boolean isTheInstancePresent(Instance instance) { String gmlIDToCheck = (String) gmlID[0]; if (!uniqueIDInstances.contains(gmlIDToCheck)) { - System.out.println("ADD gmlIDToCheck:" + gmlIDToCheck + " " - + uniqueIDInstances.contains(gmlIDToCheck) + ", size: " - + uniqueIDInstances.size()); uniqueIDInstances.add(gmlIDToCheck); return false; } else { - System.out.println("gmlIDToCheck:" + gmlIDToCheck + " " - + uniqueIDInstances.contains(gmlIDToCheck) + ", size: " - + uniqueIDInstances.size()); return true; } diff --git a/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java b/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java index f338ad8b86..df74f41b8b 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java +++ b/io/plugins/eu.esdihumboldt.hale.io.gml/src/eu/esdihumboldt/hale/io/gml/reader/internal/wfs/WfsBackedGmlInstanceCollection.java @@ -23,13 +23,10 @@ import java.net.URL; import java.text.MessageFormat; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; -import javax.xml.namespace.QName; - import org.apache.commons.io.FileUtils; import org.apache.http.NameValuePair; import org.apache.http.client.utils.URIBuilder; @@ -50,8 +47,8 @@ import eu.esdihumboldt.hale.common.instance.model.impl.IndexInstanceReference; import eu.esdihumboldt.hale.common.schema.model.TypeDefinition; import eu.esdihumboldt.hale.common.schema.model.TypeIndex; -import eu.esdihumboldt.hale.io.gml.geometry.GMLConstants; import eu.esdihumboldt.hale.io.gml.reader.internal.GmlInstanceCollection; +import eu.esdihumboldt.hale.io.gml.reader.internal.GmlInstanceCollection.GmlInstanceIterator; import eu.esdihumboldt.hale.io.gml.reader.internal.instance.StreamGmlInstance; /** @@ -309,6 +306,9 @@ public int size() { */ @Override public InstanceIterator iterator() { + if (primordialQueryParams.containsKey("RESOLVEDEPTH")) { + return new DuplicateIDsFilterIterator(new WfsBackedGmlInstanceIterator()); + } return new WfsBackedGmlInstanceIterator(); } @@ -420,12 +420,8 @@ private String getMaxFeaturesParameterName(String version) { public class WfsBackedGmlInstanceIterator implements InstanceIterator { private GmlInstanceCollection currentCollection; - private InstanceIterator iterator; + private GmlInstanceIterator iterator; private int totalFeaturesProcessed; - // store the additional objects - private final HashSet uniqueIDInstancesAdditionalObjects = new HashSet(); - // store the "main" features of the GML - private final HashSet uniqueIDMainInstances = new HashSet(); /** * Create the iterator @@ -440,7 +436,9 @@ public WfsBackedGmlInstanceIterator() { * no result, this {@link WfsBackedGmlInstanceIterator} is closed. */ private void proceedOrClose() { - iterator.close(); + if (iterator != null) { + iterator.close(); + } if (!isPaged() || isFeatureLimitReached()) { close(); @@ -448,7 +446,7 @@ private void proceedOrClose() { else { createNextIterator(); - if (!iterator.hasNext()) { + if (iterator != null && !iterator.hasNext()) { close(); } } @@ -491,9 +489,6 @@ private void createNextIterator() { sourceSchema, restrictToFeatures, ignoreRoot, strict, ignoreNamespaces, crsProvider, ioProvider); iterator = currentCollection.iterator(); - if (primordialQueryParams.containsKey("RESOLVEDEPTH")) { - iterator = new DuplicateIDsFilterIterator(iterator); - } // Make sure root element is processed by the iterator iterator.hasNext(); @@ -566,12 +561,12 @@ public boolean hasNext() { * @return true if the number of features processed is equal to (or * exceeds) the maximum number of features to processed or the * number of results reported by the WFS. + * + * The !iterator.hasNext() condition is necessary to ensure that + * instances from additionalObjects are processed after the + * final 'main' instance. */ protected boolean isFeatureLimitReached() { - // the condition (totalFeaturesProcessed >= size && - // !iterator.hasNext()) should be there in order to process the - // instances coming from the additionalObjects after the last "main" - // instance return (maxNumberOfFeatures != UNLIMITED && totalFeaturesProcessed >= maxNumberOfFeatures) || (size != UNKNOWN_SIZE && totalFeaturesProcessed >= size @@ -580,6 +575,10 @@ protected boolean isFeatureLimitReached() { /** * @see java.util.Iterator#next() + * + * Condition: if the instance is part of additional objects then + * that instance is it added but is not counted for the + * totalFeaturesProcessed */ @Override public Instance next() { @@ -588,96 +587,18 @@ public Instance next() { } Instance instance = iterator.next(); - return new StreamGmlInstance(instance, totalFeaturesProcessed++); - } - - /** - * @param instance Instance - * @return Instance - */ - private Instance processInstanceWithResolveDepth(Instance instance) { - for (QName propertyName : instance.getPropertyNames()) { - if (isGmlIdProperty(propertyName)) { - Object[] gmlID = instance.getProperty(propertyName); - if (gmlID[0] != null) { - String gmlIDToCheck = (String) gmlID[0]; - - if (instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null - && !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) - .isEmpty()) { - if (!uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { - if (uniqueIDMainInstances.contains(gmlIDToCheck)) { - if (iterator.hasNext()) { - return next(); - } - } - uniqueIDInstancesAdditionalObjects.add(gmlIDToCheck); - System.out - .println("totalFeaturesProcessed:" + totalFeaturesProcessed - + " - uniqueIDInstancesAdditionalObjects:" - + uniqueIDInstancesAdditionalObjects.size() - + " ADD to additional"); - return new StreamGmlInstance(instance, totalFeaturesProcessed); - } - } - else { - if (!uniqueIDMainInstances.contains(gmlIDToCheck)) { - uniqueIDMainInstances.add(gmlIDToCheck); - totalFeaturesProcessed++; - if (uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) { - uniqueIDInstancesAdditionalObjects.remove(gmlIDToCheck); - System.out.println( - "totalFeaturesProcessed:" + totalFeaturesProcessed - + " - uniqueIDInstancesAdditionalObjects:" - + uniqueIDInstancesAdditionalObjects.size() - + " Exists in ADDITIONAL SKIP"); - if (iterator.hasNext()) { - return next(); - } - } - System.out - .println("totalFeaturesProcessed:" + totalFeaturesProcessed - + " - uniqueIDInstancesAdditionalObjects:" - + uniqueIDInstancesAdditionalObjects.size() - + " ADD to main"); - return new StreamGmlInstance(instance, totalFeaturesProcessed); - } - } - } - } - } - return processRemainingInstances(); - - } + boolean hasAdditionalObjects = instance + .getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null + && !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS).isEmpty(); - private Instance processRemainingInstances() { - if (iterator != null && iterator.hasNext()) { - return next(); + if (primordialQueryParams.containsKey("RESOLVEDEPTH") && hasAdditionalObjects) { + return new StreamGmlInstance(instance, totalFeaturesProcessed); } else { - closeAndRecreateIterator(); - if (iterator != null && iterator.hasNext()) { - return next(); - } - else { -// return iterator.next(); - throw new NoSuchElementException(); - } + return new StreamGmlInstance(instance, totalFeaturesProcessed++); } } - private void closeAndRecreateIterator() { - close(); - createNextIterator(); - } - - private boolean isGmlIdProperty(QName propertyName) { - return (propertyName.getNamespaceURI().startsWith(GMLConstants.NS_WFS) - || propertyName.getNamespaceURI().startsWith(GMLConstants.GML_NAMESPACE_CORE)) - && "id".equals(propertyName.getLocalPart()) - && "gml".equals(propertyName.getPrefix()); - } - /** * @see eu.esdihumboldt.hale.common.instance.model.ResourceIterator#close() */