Skip to content

Commit

Permalink
Arrow/Parquet: optimize spatial filtering on GeoArrow struct encoding…
Browse files Browse the repository at this point in the history
… when there is no bbox column
  • Loading branch information
rouault committed Mar 31, 2024
1 parent 8db1700 commit 5982e63
Show file tree
Hide file tree
Showing 4 changed files with 739 additions and 142 deletions.
38 changes: 36 additions & 2 deletions autotest/ogr/ogr_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3511,8 +3511,11 @@ def check_file(filename):
],
)
@pytest.mark.parametrize("check_with_pyarrow", [True, False])
@pytest.mark.parametrize("covering_bbox", [True, False])
@gdaltest.enable_exceptions()
def test_ogr_parquet_geoarrow(tmp_vsimem, tmp_path, wkt, check_with_pyarrow):
def test_ogr_parquet_geoarrow(
tmp_vsimem, tmp_path, wkt, check_with_pyarrow, covering_bbox
):

geom = ogr.CreateGeometryFromWkt(wkt)

Expand All @@ -3525,7 +3528,12 @@ def test_ogr_parquet_geoarrow(tmp_vsimem, tmp_path, wkt, check_with_pyarrow):
ds = ogr.GetDriverByName("Parquet").CreateDataSource(filename)

lyr = ds.CreateLayer(
"test", geom_type=geom.GetGeometryType(), options=["GEOMETRY_ENCODING=GEOARROW"]
"test",
geom_type=geom.GetGeometryType(),
options=[
"GEOMETRY_ENCODING=GEOARROW",
"WRITE_COVERING_BBOX=" + ("YES" if covering_bbox else "NO"),
],
)
lyr.CreateField(ogr.FieldDefn("foo"))

Expand Down Expand Up @@ -3598,3 +3606,29 @@ def check(lyr):
lyr = ds.GetLayer(0)
lyr.SetIgnoredFields(["foo"])
check(lyr)

ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
minx, maxx, miny, maxy = geom.GetEnvelope()

lyr.SetSpatialFilter(geom)
assert lyr.GetFeatureCount() == (3 if geom.GetGeometryCount() > 1 else 2)

lyr.SetSpatialFilterRect(maxx + 1, miny, maxx + 2, maxy)
assert lyr.GetFeatureCount() == 0

lyr.SetSpatialFilterRect(minx, maxy + 1, maxx, maxy + 2)
assert lyr.GetFeatureCount() == 0

lyr.SetSpatialFilterRect(minx - 2, miny, minx - 1, maxy)
assert lyr.GetFeatureCount() == 0

lyr.SetSpatialFilterRect(minx, miny - 2, maxx, miny - 1)
assert lyr.GetFeatureCount() == 0
if (
minx != miny
and maxx != maxy
and ogr.GT_Flatten(geom.GetGeometryType()) != ogr.wkbMultiPoint
):
lyr.SetSpatialFilterRect(minx + 0.1, miny + 0.1, maxx - 0.1, maxy - 0.1)
assert lyr.GetFeatureCount() != 0
5 changes: 4 additions & 1 deletion doc/source/drivers/vector/parquet.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,10 @@ Layer creation options
:since: 3.9

Whether to write xmin/ymin/xmax/ymax columns with the bounding box of
geometries.
geometries. Writing the geometry bounding box may help applications to
perform faster spatial filtering. Writing a geometry bounding box is less
necessary for the GeoArrow geometry encoding than for the default WKB, as
implementations may be able to directly use the geometry columns.

- .. lco:: SORT_BY_BBOX
:choices: YES, NO
Expand Down
Loading

0 comments on commit 5982e63

Please sign in to comment.