diff --git a/apps/ogr2ogr_lib.cpp b/apps/ogr2ogr_lib.cpp index 14e863771fc0..084e9ef3486d 100644 --- a/apps/ogr2ogr_lib.cpp +++ b/apps/ogr2ogr_lib.cpp @@ -3883,7 +3883,9 @@ bool SetupTargetLayer::CanUseWriteArrowBatch( !psOptions->bMakeValid) { struct ArrowArrayStream streamSrc; - if (poSrcLayer->GetArrowStream(&streamSrc, nullptr)) + const char *const apszOptions[] = {"SILENCE_GET_SCHEMA_ERROR=YES", + nullptr}; + if (poSrcLayer->GetArrowStream(&streamSrc, apszOptions)) { struct ArrowSchema schemaSrc; if (streamSrc.get_schema(&streamSrc, &schemaSrc) == 0) diff --git a/autotest/generate_parquet_test_file.py b/autotest/generate_parquet_test_file.py index b0c89261ad65..06ae46e218e3 100644 --- a/autotest/generate_parquet_test_file.py +++ b/autotest/generate_parquet_test_file.py @@ -521,8 +521,11 @@ def generate_test_parquet(): type=pa.binary(), ) + null = pa.array([None] * 5, type=pa.null()) + names = [ "boolean", + "null", "uint8", "int8", "uint16", diff --git a/autotest/ogr/data/arrow/test.feather b/autotest/ogr/data/arrow/test.feather index ad4fe9612ec7..219512352f71 100644 Binary files a/autotest/ogr/data/arrow/test.feather and b/autotest/ogr/data/arrow/test.feather differ diff --git a/autotest/ogr/data/parquet/test.parquet b/autotest/ogr/data/parquet/test.parquet index c3b1d4cc13dc..846543134db4 100644 Binary files a/autotest/ogr/data/parquet/test.parquet and b/autotest/ogr/data/parquet/test.parquet differ diff --git a/autotest/ogr/data/parquet/test_single_group.parquet b/autotest/ogr/data/parquet/test_single_group.parquet index c01a5e6191bd..d6063cb13079 100644 Binary files a/autotest/ogr/data/parquet/test_single_group.parquet and b/autotest/ogr/data/parquet/test_single_group.parquet differ diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index d0506ebef3a2..5b72692f4109 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -4286,6 +4286,17 @@ inline int OGRArrowLayer::GetArrowSchema(struct ArrowArrayStream *stream, /* GetArrowSchemaInternal() */ /************************************************************************/ +static bool IsSilentlyIgnoredFormatForGetArrowSchemaArray(const char *format) +{ + // n: null + for (const char *pszSilentyIgnoredFormat : {"n"}) + { + if (strcmp(format, pszSilentyIgnoredFormat) == 0) + return true; + } + return false; +} + inline int OGRArrowLayer::GetArrowSchemaInternal(struct ArrowSchema *out_schema) const { @@ -4366,11 +4377,32 @@ OGRArrowLayer::GetArrowSchemaInternal(struct ArrowSchema *out_schema) const out_schema->children[i]->release(out_schema->children[i]); out_schema->children[i] = nullptr; } + else if (IsSilentlyIgnoredFormatForGetArrowSchemaArray( + out_schema->children[i]->format)) + { + // Silently ignore columns with null data type... + out_schema->children[i]->release(out_schema->children[i]); + } else { - // shouldn't happen - CPLError(CE_Failure, CPLE_AppDefined, - "fieldDesc[%d].nIdx < 0 not expected", i); + // can happen with data types we don't support + if (m_aosArrowArrayStreamOptions.FetchBool( + "SILENCE_GET_SCHEMA_ERROR", false)) + { + CPLDebug(GetDriverUCName().c_str(), + "GetArrowSchema() error: fieldDesc[%d].nIdx < 0 " + "not expected: name=%s, format=%s", + i, out_schema->children[i]->name, + out_schema->children[i]->format); + } + else + { + CPLError(CE_Failure, CPLE_NotSupported, + "GetArrowSchema() error: fieldDesc[%d].nIdx < 0 " + "not expected: name=%s, format=%s", + i, out_schema->children[i]->name, + out_schema->children[i]->format); + } for (; i < out_schema->n_children; ++i, ++j) out_schema->children[j] = out_schema->children[i]; out_schema->n_children = j; @@ -4503,14 +4535,17 @@ inline int OGRArrowLayer::GetNextArrowArray(struct ArrowArrayStream *stream, return EIO; } - // Remove bounding box columns from exported array - const auto RemoveBBoxColumns = + // Remove bounding box columns from exported array, or columns + // of unsupported data types that we voluntarily strip off. + const auto RemoveBBoxOrUnsupportedColumns = [out_array, &schema](const std::set &oSetBBoxArrayIndex) { int j = 0; for (int i = 0; i < static_cast(schema.n_children); ++i) { - if (oSetBBoxArrayIndex.find(i) != oSetBBoxArrayIndex.end()) + if (oSetBBoxArrayIndex.find(i) != oSetBBoxArrayIndex.end() || + IsSilentlyIgnoredFormatForGetArrowSchemaArray( + schema.children[i]->format)) { out_array->children[i]->release(out_array->children[i]); out_array->children[i] = nullptr; @@ -4537,11 +4572,11 @@ inline int OGRArrowLayer::GetNextArrowArray(struct ArrowArrayStream *stream, if (iter.second.iArrayIdx >= 0) oSetBBoxArrayIndex.insert(iter.second.iArrayIdx); } - RemoveBBoxColumns(oSetBBoxArrayIndex); + RemoveBBoxOrUnsupportedColumns(oSetBBoxArrayIndex); } else { - RemoveBBoxColumns(m_oSetBBoxArrowColumns); + RemoveBBoxOrUnsupportedColumns(m_oSetBBoxArrowColumns); } if (EQUAL(m_aosArrowArrayStreamOptions.FetchNameValueDef(