Skip to content

Commit

Permalink
PG/PGDUMP: add a LAUNDER_ASCII=YES/NO (default NO) layer creation option
Browse files Browse the repository at this point in the history
```
-  .. lco:: LAUNDER_ASCII
      :choices: YES, NO
      :default: NO
      :since: 3.9

      Implies LAUNDER=YES, with the extra substitution of UTF-8 accented
      characters in the `Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement>`__
      and `Latin Extented-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__
      sets with the closest ASCII letter. Other non-ASCII characters are
      replaced with underscore.
      Consequently this option is not appropriate for non-Latin languages.
```
  • Loading branch information
rouault committed Apr 3, 2024
1 parent 4bb2a50 commit 8fc33cf
Show file tree
Hide file tree
Showing 16 changed files with 197 additions and 38 deletions.
42 changes: 42 additions & 0 deletions autotest/ogr/ogr_pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5913,3 +5913,45 @@ def test_ogr_pg_schema_case_createlayer(pg_ds, tmp_schema):
assert lyr is None
finally:
pg_ds.ExecuteSQL(f'DROP SCHEMA "{tmp_schema_uppercase}" CASCADE')


###############################################################################
# Test LAUNDER=YES


@gdaltest.enable_exceptions()
def test_ogr_pg_LAUNDER_YES(pg_ds, tmp_schema):

eacute = b"\xC3\xA9".decode("utf-8")
lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=YES"])
assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "_"
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "_"


###############################################################################
# Test LAUNDER=NO


@gdaltest.enable_exceptions()
def test_ogr_pg_LAUNDER_NO(pg_ds, tmp_schema):

eacute = b"\xC3\xA9".decode("utf-8")
lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=NO"])
assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "#"
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "#"


###############################################################################
# Test LAUNDER_ASCII


@gdaltest.enable_exceptions()
def test_ogr_pg_LAUNDER_ASCII(pg_ds, tmp_schema):

eacute = b"\xC3\xA9".decode("utf-8")
lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute, options=["LAUNDER_ASCII=YES"])
assert lyr.GetName() == f"{tmp_schema}.ae"
lyr.CreateField(ogr.FieldDefn("b" + eacute))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "be"
60 changes: 60 additions & 0 deletions autotest/ogr/ogr_pgdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -1580,6 +1580,66 @@ def check_and_remove(needle):
)


###############################################################################
# Test LAUNDER=YES


def test_ogr_pgdump_LAUNDER_YES(tmp_vsimem):

eacute = b"\xC3\xA9".decode("utf-8")
filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_YES.sql")
ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename)
lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=YES"])
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
ds = None

f = gdal.VSIFOpenL(filename, "rb")
sql = gdal.VSIFReadL(1, 10000, f).decode("utf8")
gdal.VSIFCloseL(f)
assert '"a' + eacute + '_"' in sql
assert '"b' + eacute + '_"' in sql


###############################################################################
# Test LAUNDER=NO


def test_ogr_pgdump_LAUNDER_NO(tmp_vsimem):

eacute = b"\xC3\xA9".decode("utf-8")
filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_NO.sql")
ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename)
lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=NO"])
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
ds = None

f = gdal.VSIFOpenL(filename, "rb")
sql = gdal.VSIFReadL(1, 10000, f).decode("utf8")
gdal.VSIFCloseL(f)
assert '"a' + eacute + '#"' in sql
assert '"b' + eacute + '#"' in sql


###############################################################################
# Test LAUNDER_ASCII


def test_ogr_pgdump_LAUNDER_ASCII(tmp_vsimem):

eacute = b"\xC3\xA9".decode("utf-8")
filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_ASCII.sql")
ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename)
lyr = ds.CreateLayer("a" + eacute, options=["LAUNDER_ASCII=YES"])
lyr.CreateField(ogr.FieldDefn("b" + eacute))
ds = None

f = gdal.VSIFOpenL(filename, "rb")
sql = gdal.VSIFReadL(1, 10000, f).decode("utf8")
gdal.VSIFCloseL(f)
assert '"ae"' in sql
assert '"be"' in sql


###############################################################################
# Cleanup

Expand Down
12 changes: 12 additions & 0 deletions doc/source/drivers/vector/pg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,18 @@ Layer Creation Options
some special characters like "-" and "#" to "_". If "NO" exact names
are preserved. If enabled the table (layer) name will also be laundered.

- .. lco:: LAUNDER_ASCII
:choices: YES, NO
:default: NO
:since: 3.9

Implies LAUNDER=YES, with the extra substitution of UTF-8 accented
characters in the `Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement>`__
and `Latin Extented-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__
sets with the closest ASCII letter. Other non-ASCII characters are
replaced with underscore.
Consequently this option is not appropriate for non-Latin languages.

- .. lco:: PRECISION
:choices: YES, NO
:default: YES
Expand Down
15 changes: 13 additions & 2 deletions doc/source/drivers/vector/pgdump.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,19 @@ Layer Creation Options
layer to have their field names "laundered" into a form more
compatible with PostgreSQL. This converts to lower case and converts
some special characters like "-" and "#" to "_". If "NO" exact names
are preserved. If enabled the table
(layer) name will also be laundered.
are preserved. If enabled the table (layer) name will also be laundered.

- .. lco:: LAUNDER_ASCII
:choices: YES, NO
:default: NO
:since: 3.9

Implies LAUNDER=YES, with the extra substitution of UTF-8 accented
characters in the `Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement>`__
and `Latin Extented-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__
sets with the closest ASCII letter. Other non-ASCII characters are
replaced with underscore.
Consequently this option is not appropriate for non-Latin languages.

- .. lco:: PRECISION
:choices: YES, NO
Expand Down
2 changes: 1 addition & 1 deletion ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ OGRCARTODataSource::ICreateLayer(const char *pszNameIn,
CPLString osName(pszNameIn);
if (CPLFetchBool(papszOptions, "LAUNDER", true))
{
char *pszTmp = OGRPGCommonLaunderName(pszNameIn);
char *pszTmp = OGRPGCommonLaunderName(pszNameIn, "CARTO", false);
osName = pszTmp;
CPLFree(pszTmp);
}
Expand Down
5 changes: 3 additions & 2 deletions ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ OGRCARTOTableLayer::CreateGeomField(const OGRGeomFieldDefn *poGeomFieldIn,
if (bLaunderColumnNames)
{
char *pszSafeName =
OGRPGCommonLaunderName(poGeomField->GetNameRef(), "PG");
OGRPGCommonLaunderName(poGeomField->GetNameRef(), "CARTO", false);
poGeomField->SetName(pszSafeName);
CPLFree(pszSafeName);
}
Expand Down Expand Up @@ -735,7 +735,8 @@ OGRErr OGRCARTOTableLayer::CreateField(const OGRFieldDefn *poFieldIn,
OGRFieldDefn oField(poFieldIn);
if (bLaunderColumnNames)
{
char *pszName = OGRPGCommonLaunderName(oField.GetNameRef());
char *pszName =
OGRPGCommonLaunderName(oField.GetNameRef(), "CARTO", false);
oField.SetName(pszName);
CPLFree(pszName);
}
Expand Down
3 changes: 2 additions & 1 deletion ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,8 @@ CPLString OGRGMLASLayer::LaunderFieldName(const CPLString &osFieldName)

if (m_poDS->GetConf().m_bPGIdentifierLaundering)
{
char *pszLaundered = OGRPGCommonLaunderName(osLaunderedName, "GMLAS");
char *pszLaundered =
OGRPGCommonLaunderName(osLaunderedName, "GMLAS", false);
osLaunderedName = pszLaundered;
CPLFree(pszLaundered);
}
Expand Down
4 changes: 2 additions & 2 deletions ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ bool GMLASSchemaAnalyzer::LaunderFieldNames(GMLASFeatureClass &oClass)
for (size_t i = 0; i < aoFields.size(); i++)
{
char *pszLaundered =
OGRPGCommonLaunderName(aoFields[i].GetName(), "GMLAS");
OGRPGCommonLaunderName(aoFields[i].GetName(), "GMLAS", false);
aoFields[i].SetName(pszLaundered);
CPLFree(pszLaundered);
}
Expand Down Expand Up @@ -639,7 +639,7 @@ void GMLASSchemaAnalyzer::LaunderClassNames()
for (size_t i = 0; i < aoClasses.size(); i++)
{
char *pszLaundered =
OGRPGCommonLaunderName(aoClasses[i]->GetName(), "GMLAS");
OGRPGCommonLaunderName(aoClasses[i]->GetName(), "GMLAS", false);
aoClasses[i]->SetName(pszLaundered);
CPLFree(pszLaundered);
}
Expand Down
6 changes: 6 additions & 0 deletions ogr/ogrsf_frmts/pg/ogr_pg.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ class OGRPGTableLayer final : public OGRPGLayer
CPLString osWHERE{};

int bLaunderColumnNames = true;
bool m_bUTF8ToASCII = false;
int bPreservePrecision = true;
int bUseCopy = USE_COPY_UNSET;
int bCopyActive = false;
Expand Down Expand Up @@ -465,6 +466,11 @@ class OGRPGTableLayer final : public OGRPGLayer
bLaunderColumnNames = bFlag;
}

void SetUTF8ToASCIIFlag(bool bFlag)
{
m_bUTF8ToASCII = bFlag;
}

void SetPrecisionFlag(int bFlag)
{
bPreservePrecision = bFlag;
Expand Down
16 changes: 10 additions & 6 deletions ogr/ogrsf_frmts/pg/ogrpgdatasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1525,7 +1525,10 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName,

EndCopy();

const bool bLaunder = CPLFetchBool(papszOptions, "LAUNDER", true);
const bool bUTF8ToASCII =
CPLFetchBool(papszOptions, "LAUNDER_ASCII", false);
const bool bLaunder =
bUTF8ToASCII || CPLFetchBool(papszOptions, "LAUNDER", true);

const char *pszFIDColumnNameIn = CSLFetchNameValue(papszOptions, "FID");
CPLString osFIDColumnName;
Expand All @@ -1536,7 +1539,7 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName,
if (bLaunder)
{
char *pszLaunderedFid =
OGRPGCommonLaunderName(pszFIDColumnNameIn, "PG");
OGRPGCommonLaunderName(pszFIDColumnNameIn, "PG", bUTF8ToASCII);
osFIDColumnName += pszLaunderedFid;
CPLFree(pszLaunderedFid);
}
Expand Down Expand Up @@ -1612,17 +1615,17 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName,
pszSchemaName[length] = '\0';

if (bLaunder)
pszTableName =
OGRPGCommonLaunderName(pszDotPos + 1, "PG"); // skip "."
pszTableName = OGRPGCommonLaunderName(pszDotPos + 1, "PG",
bUTF8ToASCII); // skip "."
else
pszTableName = CPLStrdup(pszDotPos + 1); // skip "."
}
else
{
pszSchemaName = nullptr;
if (bLaunder)
pszTableName =
OGRPGCommonLaunderName(pszLayerName, "PG"); // skip "."
pszTableName = OGRPGCommonLaunderName(pszLayerName, "PG",
bUTF8ToASCII); // skip "."
else
pszTableName = CPLStrdup(pszLayerName); // skip "."
}
Expand Down Expand Up @@ -2024,6 +2027,7 @@ OGRLayer *OGRPGDataSource::ICreateLayer(const char *pszLayerName,
poLayer->SetTableDefinition(osFIDColumnName, pszGFldName, eType,
pszGeomType, nSRSId, GeometryTypeFlags);
poLayer->SetLaunderFlag(bLaunder);
poLayer->SetUTF8ToASCIIFlag(bUTF8ToASCII);
poLayer->SetPrecisionFlag(CPLFetchBool(papszOptions, "PRECISION", true));
// poLayer->SetForcedSRSId(nForcedSRSId);
poLayer->SetForcedGeometryTypeFlags(ForcedGeometryTypeFlags);
Expand Down
2 changes: 2 additions & 0 deletions ogr/ogrsf_frmts/pg/ogrpgdrivercore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ void OGRPGDriverSetCommonMetadata(GDALDriver *poDriver)
"default='NO'/>"
" <Option name='LAUNDER' type='boolean' description='Whether layer "
"and field names will be laundered' default='YES'/>"
" <Option name='LAUNDER_ASCII' type='boolean' description='Same as "
"LAUNDER, but force generation of ASCII identifiers' default='NO'/>"
" <Option name='PRECISION' type='boolean' description='Whether fields "
"created should keep the width and precision' default='YES'/>"
" <Option name='DIM' type='string' description='Set to 2 to force the "
Expand Down
11 changes: 6 additions & 5 deletions ogr/ogrsf_frmts/pg/ogrpgtablelayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2371,7 +2371,8 @@ OGRErr OGRPGTableLayer::CreateField(const OGRFieldDefn *poFieldIn,
/* -------------------------------------------------------------------- */
if (bLaunderColumnNames)
{
char *pszSafeName = OGRPGCommonLaunderName(oField.GetNameRef(), "PG");
char *pszSafeName =
OGRPGCommonLaunderName(oField.GetNameRef(), "PG", m_bUTF8ToASCII);

oField.SetName(pszSafeName);
CPLFree(pszSafeName);
Expand Down Expand Up @@ -2635,8 +2636,8 @@ OGRErr OGRPGTableLayer::CreateGeomField(const OGRGeomFieldDefn *poGeomFieldIn,
/* -------------------------------------------------------------------- */
if (bLaunderColumnNames)
{
char *pszSafeName =
OGRPGCommonLaunderName(poGeomField->GetNameRef(), "PG");
char *pszSafeName = OGRPGCommonLaunderName(poGeomField->GetNameRef(),
"PG", m_bUTF8ToASCII);

poGeomField->SetName(pszSafeName);
CPLFree(pszSafeName);
Expand Down Expand Up @@ -2962,8 +2963,8 @@ OGRErr OGRPGTableLayer::AlterFieldDefn(int iField, OGRFieldDefn *poNewFieldDefn,
{
if (bLaunderColumnNames)
{
char *pszSafeName =
OGRPGCommonLaunderName(oField.GetNameRef(), "PG");
char *pszSafeName = OGRPGCommonLaunderName(oField.GetNameRef(),
"PG", m_bUTF8ToASCII);
oField.SetName(pszSafeName);
CPLFree(pszSafeName);
}
Expand Down
9 changes: 8 additions & 1 deletion ogr/ogrsf_frmts/pgdump/ogr_pgdump.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ void CPL_DLL OGRPGCommonAppendFieldValue(
OGRPGCommonEscapeStringCbk pfnEscapeString, void *userdata);

char CPL_DLL *OGRPGCommonLaunderName(const char *pszSrcName,
const char *pszDebugPrefix = "OGR");
const char *pszDebugPrefix,
bool bUTF8ToASCII);

/************************************************************************/
/* OGRPGDumpGeomFieldDefn */
Expand Down Expand Up @@ -110,6 +111,7 @@ class OGRPGDumpLayer final : public OGRLayer
OGRFeatureDefn *m_poFeatureDefn = nullptr;
OGRPGDumpDataSource *m_poDS = nullptr;
bool m_bLaunderColumnNames = true;
bool m_bUTF8ToASCII = false;
bool m_bPreservePrecision = true;
int m_bUseCopy = USE_COPY_UNSET;
bool m_bWriteAsHex = false;
Expand Down Expand Up @@ -191,6 +193,11 @@ class OGRPGDumpLayer final : public OGRLayer
m_bLaunderColumnNames = bFlag;
}

void SetUTF8ToASCIIFlag(bool bFlag)
{
m_bUTF8ToASCII = bFlag;
}

void SetPrecisionFlag(bool bFlag)
{
m_bPreservePrecision = bFlag;
Expand Down
Loading

0 comments on commit 8fc33cf

Please sign in to comment.