diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java index a88889cb27e..5861d027e79 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java @@ -80,31 +80,29 @@ public H3InclusionIndexFilterOperator(IndexSegment segment, QueryContext queryCo @Override protected BlockDocIdSet getTrues() { - // get the set of H3 cells at the specified resolution which completely cover the input shape and potential cover. - Pair fullCoverAndPotentialCoverCells = _queryContext - .getOrComputeSharedValue(Pair.class, LITERAL_H3_CELLS_CACHE_NAME, + // NOTE: LHS is the fully covered cells, RHS is the potentially covered cells (excluding fully covered cells). + Pair fullyAndPotentiallyCoveredCells = + _queryContext.getOrComputeSharedValue(Pair.class, LITERAL_H3_CELLS_CACHE_NAME, k -> H3Utils.coverGeometryInH3(_geometry, _h3IndexReader.getH3IndexResolution().getLowestResolution())); - LongSet fullyCoverH3Cells = fullCoverAndPotentialCoverCells.getLeft(); - LongSet potentialCoverH3Cells = fullCoverAndPotentialCoverCells.getRight(); + LongSet fullyCoveredCells = fullyAndPotentiallyCoveredCells.getLeft(); + LongSet potentiallyCoveredCells = fullyAndPotentiallyCoveredCells.getRight(); - int i = 0; - ImmutableRoaringBitmap[] fullMatchDocIds = new ImmutableRoaringBitmap[fullyCoverH3Cells.size()]; - LongIterator fullyCoverH3CellsIterator = fullyCoverH3Cells.iterator(); - while (fullyCoverH3CellsIterator.hasNext()) { - fullMatchDocIds[i++] = _h3IndexReader.getDocIds(fullyCoverH3CellsIterator.nextLong()); + int numFullyCoveredCells = fullyCoveredCells.size(); + ImmutableRoaringBitmap[] fullMatchDocIds = new ImmutableRoaringBitmap[numFullyCoveredCells]; + LongIterator fullyCoveredCellsIterator = fullyCoveredCells.iterator(); + for (int i = 0; i < numFullyCoveredCells; i++) { + fullMatchDocIds[i] = _h3IndexReader.getDocIds(fullyCoveredCellsIterator.nextLong()); } MutableRoaringBitmap fullMatch = BufferFastAggregation.or(fullMatchDocIds); - // remove full match from potential match to get potential not match cells. - i = 0; - potentialCoverH3Cells.removeAll(fullyCoverH3Cells); - ImmutableRoaringBitmap[] potentialNotMatchDocIds = new ImmutableRoaringBitmap[potentialCoverH3Cells.size()]; - LongIterator potentialMatchH3CellsIterator = potentialCoverH3Cells.iterator(); - while (potentialMatchH3CellsIterator.hasNext()) { - potentialNotMatchDocIds[i++] = _h3IndexReader.getDocIds(potentialMatchH3CellsIterator.nextLong()); + int numPotentiallyCoveredCells = potentiallyCoveredCells.size(); + ImmutableRoaringBitmap[] potentialMatchDocIds = new ImmutableRoaringBitmap[numPotentiallyCoveredCells]; + LongIterator potentiallyCoveredCellsIterator = potentiallyCoveredCells.iterator(); + for (int i = 0; i < numPotentiallyCoveredCells; i++) { + potentialMatchDocIds[i] = _h3IndexReader.getDocIds(potentiallyCoveredCellsIterator.nextLong()); } - MutableRoaringBitmap potentialMatch = BufferFastAggregation.or(potentialNotMatchDocIds); + MutableRoaringBitmap potentialMatch = BufferFastAggregation.or(potentialMatchDocIds); if (_isPositiveCheck) { return getFilterBlock(fullMatch, potentialMatch); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java index 706a85e3888..2c0b995199f 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java @@ -20,10 +20,9 @@ import com.uber.h3core.H3CoreV3; import com.uber.h3core.util.LatLng; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.longs.LongSet; +import it.unimi.dsi.fastutil.longs.LongSets; import java.io.IOException; import java.util.Arrays; import java.util.Collections; @@ -39,7 +38,6 @@ public class H3Utils { - private H3Utils() { } @@ -53,45 +51,57 @@ private H3Utils() { } } + /** + * Returns the H3 cells that is crossed by the line. + */ private static LongSet coverLineInH3(LineString lineString, int resolution) { - LongSet coveringH3Cells = new LongOpenHashSet(); - LongList endpointH3Cells = new LongArrayList(); - for (Coordinate endpoint : lineString.getCoordinates()) { - endpointH3Cells.add(H3_CORE.geoToH3(endpoint.y, endpoint.x, resolution)); + Coordinate[] endPoints = lineString.getCoordinates(); + int numEndPoints = endPoints.length; + if (numEndPoints == 0) { + return LongSets.EMPTY_SET; } - for (int i = 0; i < endpointH3Cells.size() - 1; i++) { - try { - coveringH3Cells.addAll(H3_CORE.h3Line(endpointH3Cells.getLong(i), endpointH3Cells.getLong(i + 1))); - } catch (Exception e) { - throw new RuntimeException(e); - } + long previousCell = H3_CORE.geoToH3(endPoints[0].y, endPoints[0].x, resolution); + if (numEndPoints == 1) { + return LongSets.singleton(previousCell); + } + LongSet coveringCells = new LongOpenHashSet(); + for (int i = 1; i < numEndPoints; i++) { + long currentCell = H3_CORE.geoToH3(endPoints[i].y, endPoints[i].x, resolution); + coveringCells.addAll(H3_CORE.h3Line(previousCell, currentCell)); + previousCell = currentCell; } - return coveringH3Cells; + return coveringCells; } + /** + * Returns the H3 cells that is fully covered and potentially covered (excluding fully covered) by the polygon. + */ private static Pair coverPolygonInH3(Polygon polygon, int resolution) { + // TODO: this can be further optimized to use native H3 implementation. They have plan to support natively. + // https://github.com/apache/pinot/issues/8547 List polyfillCells = H3_CORE.polyfill(Arrays.stream(polygon.getExteriorRing().getCoordinates()) .map(coordinate -> new LatLng(coordinate.y, coordinate.x)).collect(Collectors.toList()), Collections.emptyList(), resolution); - // TODO: this can be further optimized to use native H3 implementation. They have plan to support natively. - // https://github.com/apache/pinot/issues/8547 - LongSet potentialH3Cells = new LongOpenHashSet(); if (polyfillCells.isEmpty()) { // If the polyfill cells are empty, meaning the polygon might be smaller than a single cell in the H3 system. // So just get whatever one. here choose the first one. the follow up kRing(firstCell, 1) will cover the whole // polygon if there is potential not covered by the first point's belonging cell. // ref: https://github.com/uber/h3/issues/456#issuecomment-827760163 Coordinate represent = polygon.getCoordinate(); - potentialH3Cells.add(H3_CORE.geoToH3(represent.getY(), represent.getX(), resolution)); - } else { - potentialH3Cells.addAll(polyfillCells); + return Pair.of(LongSets.EMPTY_SET, + new LongOpenHashSet(H3_CORE.kRing(H3_CORE.geoToH3(represent.y, represent.x, resolution), 1))); + } + + LongSet fullyCoveredCells = new LongOpenHashSet(); + LongSet potentiallyCoveredCells = new LongOpenHashSet(polyfillCells); + for (long cell : polyfillCells) { + if (polygon.contains(createPolygonFromH3Cell(cell))) { + fullyCoveredCells.add(cell); + } + potentiallyCoveredCells.addAll(H3_CORE.kRing(cell, 1)); } - LongSet fullyContainedCell = new LongOpenHashSet( - potentialH3Cells.stream().filter(h3Cell -> polygon.contains(createPolygonFromH3Cell(h3Cell))) - .collect(Collectors.toSet())); - potentialH3Cells - .addAll(potentialH3Cells.stream().flatMap(cell -> H3_CORE.kRing(cell, 1).stream()).collect(Collectors.toSet())); - return Pair.of(fullyContainedCell, potentialH3Cells); + potentiallyCoveredCells.removeAll(fullyCoveredCells); + return Pair.of(fullyCoveredCells, potentiallyCoveredCells); } private static Polygon createPolygonFromH3Cell(long h3Cell) { @@ -101,27 +111,28 @@ private static Polygon createPolygonFromH3Cell(long h3Cell) { boundary.stream().map(geoCoord -> new Coordinate(geoCoord.lng, geoCoord.lat)).toArray(Coordinate[]::new)); } - // Return a pair of cell ids: The first fully contain, the second is potential contain. - // potential contains contain the fully contain. + /** + * Returns the H3 cells that is fully covered and potentially covered (excluding fully covered) by the geometry. + */ public static Pair coverGeometryInH3(Geometry geometry, int resolution) { if (geometry instanceof Point) { - LongSet potentialCover = new LongOpenHashSet(); - potentialCover.add(H3_CORE.geoToH3(geometry.getCoordinate().y, geometry.getCoordinate().x, resolution)); - return Pair.of(new LongOpenHashSet(), potentialCover); + return Pair.of(LongSets.EMPTY_SET, + LongSets.singleton(H3_CORE.geoToH3(geometry.getCoordinate().y, geometry.getCoordinate().x, resolution))); } else if (geometry instanceof LineString) { - LongSet potentialCover = new LongOpenHashSet(); - potentialCover.addAll(coverLineInH3(((LineString) geometry), resolution)); - return Pair.of(new LongOpenHashSet(), potentialCover); + return Pair.of(LongSets.EMPTY_SET, coverLineInH3(((LineString) geometry), resolution)); } else if (geometry instanceof Polygon) { return coverPolygonInH3(((Polygon) geometry), resolution); } else if (geometry instanceof GeometryCollection) { - LongOpenHashSet fullCover = new LongOpenHashSet(); - LongOpenHashSet potentialCover = new LongOpenHashSet(); - for (int i = 0; i < geometry.getNumGeometries(); i++) { - fullCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), resolution).getLeft()); - potentialCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), resolution).getRight()); + LongOpenHashSet fullyCoveredCells = new LongOpenHashSet(); + LongOpenHashSet potentiallyCoveredCells = new LongOpenHashSet(); + int numGeometries = geometry.getNumGeometries(); + for (int i = 0; i < numGeometries; i++) { + Pair pair = coverGeometryInH3(geometry.getGeometryN(i), resolution); + fullyCoveredCells.addAll(pair.getLeft()); + potentiallyCoveredCells.addAll(pair.getRight()); } - return Pair.of(fullCover, potentialCover); + potentiallyCoveredCells.removeAll(fullyCoveredCells); + return Pair.of(fullyCoveredCells, potentiallyCoveredCells); } else { throw new UnsupportedOperationException("Unexpected type: " + geometry.getGeometryType()); }