From 8fc33cffb357f16bb9f7280930b406645caa2ac9 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 3 Apr 2024 22:55:10 +0200 Subject: [PATCH] PG/PGDUMP: add a LAUNDER_ASCII=YES/NO (default NO) layer creation option ``` - .. lco:: LAUNDER_ASCII :choices: YES, NO :default: NO :since: 3.9 Implies LAUNDER=YES, with the extra substitution of UTF-8 accented characters in the `Latin-1 Supplement `__ and `Latin Extented-A `__ sets with the closest ASCII letter. Other non-ASCII characters are replaced with underscore. Consequently this option is not appropriate for non-Latin languages. ``` --- autotest/ogr/ogr_pg.py | 42 +++++++++++++ autotest/ogr/ogr_pgdump.py | 60 +++++++++++++++++++ doc/source/drivers/vector/pg.rst | 12 ++++ doc/source/drivers/vector/pgdump.rst | 15 ++++- ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp | 2 +- ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp | 5 +- ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp | 3 +- .../gmlas/ogrgmlasschemaanalyzer.cpp | 4 +- ogr/ogrsf_frmts/pg/ogr_pg.h | 6 ++ ogr/ogrsf_frmts/pg/ogrpgdatasource.cpp | 16 +++-- ogr/ogrsf_frmts/pg/ogrpgdrivercore.cpp | 2 + ogr/ogrsf_frmts/pg/ogrpgtablelayer.cpp | 11 ++-- ogr/ogrsf_frmts/pgdump/ogr_pgdump.h | 9 ++- .../pgdump/ogrpgdumpdatasource.cpp | 38 +++++++----- ogr/ogrsf_frmts/pgdump/ogrpgdumpdriver.cpp | 2 + ogr/ogrsf_frmts/pgdump/ogrpgdumplayer.cpp | 8 +-- 16 files changed, 197 insertions(+), 38 deletions(-) diff --git a/autotest/ogr/ogr_pg.py b/autotest/ogr/ogr_pg.py index cca1a408cd42..4854688e707b 100755 --- a/autotest/ogr/ogr_pg.py +++ b/autotest/ogr/ogr_pg.py @@ -5913,3 +5913,45 @@ def test_ogr_pg_schema_case_createlayer(pg_ds, tmp_schema): assert lyr is None finally: pg_ds.ExecuteSQL(f'DROP SCHEMA "{tmp_schema_uppercase}" CASCADE') + + +############################################################################### +# Test LAUNDER=YES + + +@gdaltest.enable_exceptions() +def test_ogr_pg_LAUNDER_YES(pg_ds, tmp_schema): + + eacute = b"\xC3\xA9".decode("utf-8") + lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=YES"]) + assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "_" + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "_" + + +############################################################################### +# Test LAUNDER=NO + + +@gdaltest.enable_exceptions() +def test_ogr_pg_LAUNDER_NO(pg_ds, tmp_schema): + + eacute = b"\xC3\xA9".decode("utf-8") + lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=NO"]) + assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "#" + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "#" + + +############################################################################### +# Test LAUNDER_ASCII + + +@gdaltest.enable_exceptions() +def test_ogr_pg_LAUNDER_ASCII(pg_ds, tmp_schema): + + eacute = b"\xC3\xA9".decode("utf-8") + lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute, options=["LAUNDER_ASCII=YES"]) + assert lyr.GetName() == f"{tmp_schema}.ae" + lyr.CreateField(ogr.FieldDefn("b" + eacute)) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "be" diff --git a/autotest/ogr/ogr_pgdump.py b/autotest/ogr/ogr_pgdump.py index ec92a8491889..fddb6cf7bfe5 100755 --- a/autotest/ogr/ogr_pgdump.py +++ b/autotest/ogr/ogr_pgdump.py @@ -1580,6 +1580,66 @@ def check_and_remove(needle): ) +############################################################################### +# Test LAUNDER=YES + + +def test_ogr_pgdump_LAUNDER_YES(tmp_vsimem): + + eacute = b"\xC3\xA9".decode("utf-8") + filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_YES.sql") + ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename) + lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=YES"]) + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + ds = None + + f = gdal.VSIFOpenL(filename, "rb") + sql = gdal.VSIFReadL(1, 10000, f).decode("utf8") + gdal.VSIFCloseL(f) + assert '"a' + eacute + '_"' in sql + assert '"b' + eacute + '_"' in sql + + +############################################################################### +# Test LAUNDER=NO + + +def test_ogr_pgdump_LAUNDER_NO(tmp_vsimem): + + eacute = b"\xC3\xA9".decode("utf-8") + filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_NO.sql") + ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename) + lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=NO"]) + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + ds = None + + f = gdal.VSIFOpenL(filename, "rb") + sql = gdal.VSIFReadL(1, 10000, f).decode("utf8") + gdal.VSIFCloseL(f) + assert '"a' + eacute + '#"' in sql + assert '"b' + eacute + '#"' in sql + + +############################################################################### +# Test LAUNDER_ASCII + + +def test_ogr_pgdump_LAUNDER_ASCII(tmp_vsimem): + + eacute = b"\xC3\xA9".decode("utf-8") + filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_ASCII.sql") + ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename) + lyr = ds.CreateLayer("a" + eacute, options=["LAUNDER_ASCII=YES"]) + lyr.CreateField(ogr.FieldDefn("b" + eacute)) + ds = None + + f = gdal.VSIFOpenL(filename, "rb") + sql = gdal.VSIFReadL(1, 10000, f).decode("utf8") + gdal.VSIFCloseL(f) + assert '"ae"' in sql + assert '"be"' in sql + + ############################################################################### # Cleanup diff --git a/doc/source/drivers/vector/pg.rst b/doc/source/drivers/vector/pg.rst index 0a9e1c2749c0..2e5e26c5e7c8 100644 --- a/doc/source/drivers/vector/pg.rst +++ b/doc/source/drivers/vector/pg.rst @@ -268,6 +268,18 @@ Layer Creation Options some special characters like "-" and "#" to "_". If "NO" exact names are preserved. If enabled the table (layer) name will also be laundered. +- .. lco:: LAUNDER_ASCII + :choices: YES, NO + :default: NO + :since: 3.9 + + Implies LAUNDER=YES, with the extra substitution of UTF-8 accented + characters in the `Latin-1 Supplement `__ + and `Latin Extented-A `__ + sets with the closest ASCII letter. Other non-ASCII characters are + replaced with underscore. + Consequently this option is not appropriate for non-Latin languages. + - .. lco:: PRECISION :choices: YES, NO :default: YES diff --git a/doc/source/drivers/vector/pgdump.rst b/doc/source/drivers/vector/pgdump.rst index 7fd0ad96be28..4a689aca4649 100644 --- a/doc/source/drivers/vector/pgdump.rst +++ b/doc/source/drivers/vector/pgdump.rst @@ -63,8 +63,19 @@ Layer Creation Options layer to have their field names "laundered" into a form more compatible with PostgreSQL. This converts to lower case and converts some special characters like "-" and "#" to "_". If "NO" exact names - are preserved. If enabled the table - (layer) name will also be laundered. + are preserved. If enabled the table (layer) name will also be laundered. + +- .. lco:: LAUNDER_ASCII + :choices: YES, NO + :default: NO + :since: 3.9 + + Implies LAUNDER=YES, with the extra substitution of UTF-8 accented + characters in the `Latin-1 Supplement `__ + and `Latin Extented-A `__ + sets with the closest ASCII letter. Other non-ASCII characters are + replaced with underscore. + Consequently this option is not appropriate for non-Latin languages. - .. lco:: PRECISION :choices: YES, NO diff --git a/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp b/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp index 3b8ca2da213f..a50ec0d778ac 100644 --- a/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp +++ b/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp @@ -458,7 +458,7 @@ OGRCARTODataSource::ICreateLayer(const char *pszNameIn, CPLString osName(pszNameIn); if (CPLFetchBool(papszOptions, "LAUNDER", true)) { - char *pszTmp = OGRPGCommonLaunderName(pszNameIn); + char *pszTmp = OGRPGCommonLaunderName(pszNameIn, "CARTO", false); osName = pszTmp; CPLFree(pszTmp); } diff --git a/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp b/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp index 70d4275edf80..695632dcd17c 100644 --- a/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp +++ b/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp @@ -671,7 +671,7 @@ OGRCARTOTableLayer::CreateGeomField(const OGRGeomFieldDefn *poGeomFieldIn, if (bLaunderColumnNames) { char *pszSafeName = - OGRPGCommonLaunderName(poGeomField->GetNameRef(), "PG"); + OGRPGCommonLaunderName(poGeomField->GetNameRef(), "CARTO", false); poGeomField->SetName(pszSafeName); CPLFree(pszSafeName); } @@ -735,7 +735,8 @@ OGRErr OGRCARTOTableLayer::CreateField(const OGRFieldDefn *poFieldIn, OGRFieldDefn oField(poFieldIn); if (bLaunderColumnNames) { - char *pszName = OGRPGCommonLaunderName(oField.GetNameRef()); + char *pszName = + OGRPGCommonLaunderName(oField.GetNameRef(), "CARTO", false); oField.SetName(pszName); CPLFree(pszName); } diff --git a/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp b/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp index c8d9ec6bb38b..90ab121f2834 100644 --- a/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp +++ b/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp @@ -1271,7 +1271,8 @@ CPLString OGRGMLASLayer::LaunderFieldName(const CPLString &osFieldName) if (m_poDS->GetConf().m_bPGIdentifierLaundering) { - char *pszLaundered = OGRPGCommonLaunderName(osLaunderedName, "GMLAS"); + char *pszLaundered = + OGRPGCommonLaunderName(osLaunderedName, "GMLAS", false); osLaunderedName = pszLaundered; CPLFree(pszLaundered); } diff --git a/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp b/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp index 91646c72e4bb..d257548f2098 100644 --- a/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp +++ b/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp @@ -548,7 +548,7 @@ bool GMLASSchemaAnalyzer::LaunderFieldNames(GMLASFeatureClass &oClass) for (size_t i = 0; i < aoFields.size(); i++) { char *pszLaundered = - OGRPGCommonLaunderName(aoFields[i].GetName(), "GMLAS"); + OGRPGCommonLaunderName(aoFields[i].GetName(), "GMLAS", false); aoFields[i].SetName(pszLaundered); CPLFree(pszLaundered); } @@ -639,7 +639,7 @@ void GMLASSchemaAnalyzer::LaunderClassNames() for (size_t i = 0; i < aoClasses.size(); i++) { char *pszLaundered = - OGRPGCommonLaunderName(aoClasses[i]->GetName(), "GMLAS"); + OGRPGCommonLaunderName(aoClasses[i]->GetName(), "GMLAS", false); aoClasses[i]->SetName(pszLaundered); CPLFree(pszLaundered); } diff --git a/ogr/ogrsf_frmts/pg/ogr_pg.h b/ogr/ogrsf_frmts/pg/ogr_pg.h index 40e9e0981388..2bd57b53b475 100644 --- a/ogr/ogrsf_frmts/pg/ogr_pg.h +++ b/ogr/ogrsf_frmts/pg/ogr_pg.h @@ -324,6 +324,7 @@ class OGRPGTableLayer final : public OGRPGLayer CPLString osWHERE{}; int bLaunderColumnNames = true; + bool m_bUTF8ToASCII = false; int bPreservePrecision = true; int bUseCopy = USE_COPY_UNSET; int bCopyActive = false; @@ -465,6 +466,11 @@ class OGRPGTableLayer final : public OGRPGLayer bLaunderColumnNames = bFlag; } + void SetUTF8ToASCIIFlag(bool bFlag) + { + m_bUTF8ToASCII = bFlag; + } + void SetPrecisionFlag(int bFlag) { bPreservePrecision = bFlag; diff --git a/ogr/ogrsf_frmts/pg/ogrpgdatasource.cpp b/ogr/ogrsf_frmts/pg/ogrpgdatasource.cpp index c72621599036..e86973fbfb2a 100644 --- a/ogr/ogrsf_frmts/pg/ogrpgdatasource.cpp +++ b/ogr/ogrsf_frmts/pg/ogrpgdatasource.cpp @@ -1525,7 +1525,10 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName, EndCopy(); - const bool bLaunder = CPLFetchBool(papszOptions, "LAUNDER", true); + const bool bUTF8ToASCII = + CPLFetchBool(papszOptions, "LAUNDER_ASCII", false); + const bool bLaunder = + bUTF8ToASCII || CPLFetchBool(papszOptions, "LAUNDER", true); const char *pszFIDColumnNameIn = CSLFetchNameValue(papszOptions, "FID"); CPLString osFIDColumnName; @@ -1536,7 +1539,7 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName, if (bLaunder) { char *pszLaunderedFid = - OGRPGCommonLaunderName(pszFIDColumnNameIn, "PG"); + OGRPGCommonLaunderName(pszFIDColumnNameIn, "PG", bUTF8ToASCII); osFIDColumnName += pszLaunderedFid; CPLFree(pszLaunderedFid); } @@ -1612,8 +1615,8 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName, pszSchemaName[length] = '\0'; if (bLaunder) - pszTableName = - OGRPGCommonLaunderName(pszDotPos + 1, "PG"); // skip "." + pszTableName = OGRPGCommonLaunderName(pszDotPos + 1, "PG", + bUTF8ToASCII); // skip "." else pszTableName = CPLStrdup(pszDotPos + 1); // skip "." } @@ -1621,8 +1624,8 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName, { pszSchemaName = nullptr; if (bLaunder) - pszTableName = - OGRPGCommonLaunderName(pszLayerName, "PG"); // skip "." + pszTableName = OGRPGCommonLaunderName(pszLayerName, "PG", + bUTF8ToASCII); // skip "." else pszTableName = CPLStrdup(pszLayerName); // skip "." } @@ -2024,6 +2027,7 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName, poLayer->SetTableDefinition(osFIDColumnName, pszGFldName, eType, pszGeomType, nSRSId, GeometryTypeFlags); poLayer->SetLaunderFlag(bLaunder); + poLayer->SetUTF8ToASCIIFlag(bUTF8ToASCII); poLayer->SetPrecisionFlag(CPLFetchBool(papszOptions, "PRECISION", true)); // poLayer->SetForcedSRSId(nForcedSRSId); poLayer->SetForcedGeometryTypeFlags(ForcedGeometryTypeFlags); diff --git a/ogr/ogrsf_frmts/pg/ogrpgdrivercore.cpp b/ogr/ogrsf_frmts/pg/ogrpgdrivercore.cpp index 0670ae6f35fc..f8d2216bf125 100644 --- a/ogr/ogrsf_frmts/pg/ogrpgdrivercore.cpp +++ b/ogr/ogrsf_frmts/pg/ogrpgdrivercore.cpp @@ -112,6 +112,8 @@ void OGRPGDriverSetCommonMetadata(GDALDriver *poDriver) "default='NO'/>" " " "