Skip to content

Commit

Permalink
Merge pull request #9686 from rouault/parquet_tune_overture
Browse files Browse the repository at this point in the history
Parquet: recognize bbox field for OvertureMaps 2024-01-17-alpha.0 and 2024-04-16-beta.0
  • Loading branch information
rouault authored Apr 17, 2024
2 parents 324d6b0 + 8133fb4 commit 94135e9
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 5 deletions.
Binary file not shown.
49 changes: 48 additions & 1 deletion autotest/ogr/ogr_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3078,14 +3078,15 @@ def check_file(filename):

###############################################################################
# Test GetExtent() using bbox.minx, bbox.miny, bbox.maxx, bbox.maxy fields
# as in Ouverture Maps datasets
# as in Overture Maps datasets


def test_ogr_parquet_bbox_double():

ds = ogr.Open("data/parquet/overture_map_extract.parquet")
lyr = ds.GetLayer(0)
assert lyr.GetGeometryColumn() == "geometry"
assert lyr.GetLayerDefn().GetFieldIndex("bbox.minx") < 0
assert lyr.TestCapability(ogr.OLCFastGetExtent) == 1
minx, maxx, miny, maxy = lyr.GetExtent()
assert (minx, miny, maxx, maxy) == pytest.approx(
Expand All @@ -3109,6 +3110,7 @@ def test_ogr_parquet_bbox_double():
ds = ogr.Open("data/parquet/overture_map_extract.parquet")
lyr = ds.GetLayer(0)
assert lyr.GetGeometryColumn() == "geometry"
assert lyr.GetLayerDefn().GetFieldIndex("bbox.minx") >= 0
assert lyr.TestCapability(ogr.OLCFastGetExtent) == 0
minx, maxx, miny, maxy = lyr.GetExtent()
assert (minx, miny, maxx, maxy) == pytest.approx(
Expand All @@ -3117,6 +3119,51 @@ def test_ogr_parquet_bbox_double():
ds = None


###############################################################################
# Test GetExtent() using bbox.minx, bbox.miny, bbox.maxx, bbox.maxy fields
# as in Overture Maps datasets 2024-04-16-beta.0


def test_ogr_parquet_bbox_float32_but_no_covering_in_metadata():

ds = ogr.Open("data/parquet/bbox_similar_to_overturemaps_2024-04-16-beta.0.parquet")
lyr = ds.GetLayer(0)
assert lyr.GetGeometryColumn() == "geometry"
assert lyr.GetLayerDefn().GetFieldIndex("bbox.xmin") < 0
assert lyr.TestCapability(ogr.OLCFastGetExtent) == 1
minx, maxx, miny, maxy = lyr.GetExtent()
assert (minx, miny, maxx, maxy) == pytest.approx(
(478315.53125, 4762880.5, 481645.3125, 4765610.5)
)

with ogrtest.spatial_filter(
lyr,
minx + (maxx - minx) / 2,
miny + (maxy - miny) / 2,
maxx - (maxx - minx) / 2,
maxy - (maxy - miny) / 2,
):
f = lyr.GetNextFeature()
assert f.GetFID() == 8
assert lyr.GetNextFeature() is None

ds = None

with gdaltest.config_option("OGR_PARQUET_USE_BBOX", "NO"):
ds = ogr.Open(
"data/parquet/bbox_similar_to_overturemaps_2024-04-16-beta.0.parquet"
)
lyr = ds.GetLayer(0)
assert lyr.GetGeometryColumn() == "geometry"
assert lyr.GetLayerDefn().GetFieldIndex("bbox.xmin") >= 0
assert lyr.TestCapability(ogr.OLCFastGetExtent) == 0
minx, maxx, miny, maxy = lyr.GetExtent()
assert (minx, miny, maxx, maxy) == pytest.approx(
(478315.53125, 4762880.5, 481645.3125, 4765610.5)
)
ds = None


###############################################################################


Expand Down
5 changes: 4 additions & 1 deletion ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,10 @@ inline void OGRArrowWriterLayer::CreateSchemaCommon()
auto bbox_field_xmax(arrow::field("xmax", arrow::float32(), false));
auto bbox_field_ymax(arrow::field("ymax", arrow::float32(), false));
auto bbox_field(arrow::field(
std::string(poGeomFieldDefn->GetNameRef()).append("_bbox"),
CPLGetConfigOption("OGR_PARQUET_COVERING_BBOX_NAME",
std::string(poGeomFieldDefn->GetNameRef())
.append("_bbox")
.c_str()),
arrow::struct_(
{std::move(bbox_field_xmin), std::move(bbox_field_ymin),
std::move(bbox_field_xmax), std::move(bbox_field_ymax)}),
Expand Down
65 changes: 63 additions & 2 deletions ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,8 +554,14 @@ void OGRParquetLayer::EstablishFeatureDefn()
}

// Synthetize a GeoParquet bounding box column definition when detecting
// a Overture Map dataset
if (m_oMapGeometryColumns.empty() && bUseBBOX &&
// a Overture Map dataset < 2024-04-16-beta.0
if ((m_oMapGeometryColumns.empty() ||
// Below is for release 2024-01-17-alpha.0
(m_oMapGeometryColumns.find("geometry") !=
m_oMapGeometryColumns.end() &&
!m_oMapGeometryColumns["geometry"].GetObj("covering").IsValid() &&
m_oMapGeometryColumns["geometry"].GetString("encoding") == "WKB")) &&
bUseBBOX &&
oMapParquetColumnNameToIdx.find("geometry") !=
oMapParquetColumnNameToIdx.end() &&
oMapParquetColumnNameToIdx.find("bbox.minx") !=
Expand All @@ -568,6 +574,11 @@ void OGRParquetLayer::EstablishFeatureDefn()
oMapParquetColumnNameToIdx.end())
{
CPLJSONObject oDef;
if (m_oMapGeometryColumns.find("geometry") !=
m_oMapGeometryColumns.end())
{
oDef = m_oMapGeometryColumns["geometry"];
}
CPLJSONObject oCovering;
oDef.Add("covering", oCovering);
CPLJSONObject oBBOX;
Expand Down Expand Up @@ -600,6 +611,56 @@ void OGRParquetLayer::EstablishFeatureDefn()
oDef.Add("encoding", "WKB");
m_oMapGeometryColumns["geometry"] = std::move(oDef);
}
// Overture Maps 2024-04-16-beta.0 almost follows GeoParquet 1.1, except
// they don't declare the "covering" element in the GeoParquet JSON metadata
else if (m_oMapGeometryColumns.find("geometry") !=
m_oMapGeometryColumns.end() &&
bUseBBOX &&
!m_oMapGeometryColumns["geometry"].GetObj("covering").IsValid() &&
m_oMapGeometryColumns["geometry"].GetString("encoding") == "WKB" &&
oMapParquetColumnNameToIdx.find("geometry") !=
oMapParquetColumnNameToIdx.end() &&
oMapParquetColumnNameToIdx.find("bbox.xmin") !=
oMapParquetColumnNameToIdx.end() &&
oMapParquetColumnNameToIdx.find("bbox.ymin") !=
oMapParquetColumnNameToIdx.end() &&
oMapParquetColumnNameToIdx.find("bbox.xmax") !=
oMapParquetColumnNameToIdx.end() &&
oMapParquetColumnNameToIdx.find("bbox.ymax") !=
oMapParquetColumnNameToIdx.end())
{
CPLJSONObject oDef = m_oMapGeometryColumns["geometry"];
CPLJSONObject oCovering;
oDef.Add("covering", oCovering);
CPLJSONObject oBBOX;
oCovering.Add("bbox", oBBOX);
{
CPLJSONArray oArray;
oArray.Add("bbox");
oArray.Add("xmin");
oBBOX.Add("xmin", oArray);
}
{
CPLJSONArray oArray;
oArray.Add("bbox");
oArray.Add("ymin");
oBBOX.Add("ymin", oArray);
}
{
CPLJSONArray oArray;
oArray.Add("bbox");
oArray.Add("xmax");
oBBOX.Add("xmax", oArray);
}
{
CPLJSONArray oArray;
oArray.Add("bbox");
oArray.Add("ymax");
oBBOX.Add("ymax", oArray);
}
oSetBBOXColumns.insert("bbox");
m_oMapGeometryColumns["geometry"] = std::move(oDef);
}

int iParquetCol = 0;
for (int i = 0; i < m_poSchema->num_fields(); ++i)
Expand Down
4 changes: 3 additions & 1 deletion ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,9 @@ std::string OGRParquetWriterLayer::GetGeoMetadata() const
}

// Bounding box column definition
if (m_bWriteBBoxStruct)
if (m_bWriteBBoxStruct &&
CPLTestBool(CPLGetConfigOption(
"OGR_PARQUET_WRITE_COVERING_BBOX_IN_METADATA", "YES")))
{
CPLJSONObject oCovering;
oColumn.Add("covering", oCovering);
Expand Down

0 comments on commit 94135e9

Please sign in to comment.