Skip to content

Commit

Permalink
Merge pull request OSGeo#9609 from rouault/gpkg_launder
Browse files Browse the repository at this point in the history
GeoPackage: add a LAUNDER=YES/NO layer creation option ; PG/PGDUMP: add a LAUNDER_ASCII=YES/NO (default NO) layer creation option
  • Loading branch information
rouault authored Apr 14, 2024
2 parents f5d64e8 + 8039763 commit 5ba8582
Show file tree
Hide file tree
Showing 58 changed files with 1,316 additions and 135 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ repos:
frmts/pcidsk/sdk|
frmts/grib/degrib/degrib|
frmts/grib/degrib/g2clib|
ogr/utf8.h|
port/utf8.h|
ogr/ogrsf_frmts/cad/libopencad/|
ogr/ogrsf_frmts/geojson/libjson/|
ogr/ogrsf_frmts/flatgeobuf/flatbuffers/|
Expand Down
2 changes: 1 addition & 1 deletion apps/gdaltindex_lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static bool PatternMatch(const char *input, const char *pattern)
}
else
{
if (tolower(*pattern) != tolower(*input))
if (CPLTolower(*pattern) != CPLTolower(*input))
{
return false;
}
Expand Down
1 change: 1 addition & 0 deletions autotest/cpp/data/utf8accents.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢvŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽž
1 change: 1 addition & 0 deletions autotest/cpp/data/utf8accents_ascii.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
AAAAAAAECEEEEIIIINOOOOOUUUUYSSaaaaaaaeceeeeiiiinoooooouuuuyuAaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgHhHhIiIiIiIiIiIJijJjKkkLlLlLlLlLlNnNnNnOoOoOoOEoeRrRrRrSsSsSsSsTvTtTtUuUuUuUuUuUuWwYyYZzZzZz
75 changes: 75 additions & 0 deletions autotest/cpp/test_cpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5129,4 +5129,79 @@ TEST_F(test_cpl, CPLStrtod)
}
}

TEST_F(test_cpl, CPLForceToASCII)
{
{
char *pszOut = CPLForceToASCII("foo", -1, '_');
EXPECT_STREQ(pszOut, "foo");
CPLFree(pszOut);
}
{
char *pszOut = CPLForceToASCII("foo", 1, '_');
EXPECT_STREQ(pszOut, "f");
CPLFree(pszOut);
}
{
char *pszOut = CPLForceToASCII("foo\xFF", -1, '_');
EXPECT_STREQ(pszOut, "foo_");
CPLFree(pszOut);
}
}

TEST_F(test_cpl, CPLUTF8ForceToASCII)
{
{
char *pszOut = CPLUTF8ForceToASCII("foo", '_');
EXPECT_STREQ(pszOut, "foo");
CPLFree(pszOut);
}
{
// Truncated UTF-8 character
char *pszOut = CPLUTF8ForceToASCII("foo\xC0", '_');
EXPECT_STREQ(pszOut, "foo");
CPLFree(pszOut);
}
{
char *pszOut = CPLUTF8ForceToASCII("foo\xc2\x80", '_');
EXPECT_STREQ(pszOut, "foo_");
CPLFree(pszOut);
}
{
char *pszOut = CPLUTF8ForceToASCII("foo\xc2\x80x", '_');
EXPECT_STREQ(pszOut, "foo_x");
CPLFree(pszOut);
}
{
std::string s;
{
VSILFILE *f =
VSIFOpenL((data_ + SEP + "utf8accents.txt").c_str(), "rb");
ASSERT_NE(f, nullptr);
VSIFSeekL(f, 0, SEEK_END);
s.resize(static_cast<size_t>(VSIFTellL(f)));
VSIFSeekL(f, 0, SEEK_SET);
VSIFReadL(&s[0], 1, s.size(), f);
VSIFCloseL(f);
while (!s.empty() && s.back() == '\n')
s.pop_back();
}
std::string sRef;
{
VSILFILE *f = VSIFOpenL(
(data_ + SEP + "utf8accents_ascii.txt").c_str(), "rb");
ASSERT_NE(f, nullptr);
VSIFSeekL(f, 0, SEEK_END);
sRef.resize(static_cast<size_t>(VSIFTellL(f)));
VSIFSeekL(f, 0, SEEK_SET);
VSIFReadL(&sRef[0], 1, sRef.size(), f);
VSIFCloseL(f);
while (!sRef.empty() && sRef.back() == '\n')
sRef.pop_back();
}
char *pszOut = CPLUTF8ForceToASCII(s.c_str(), '_');
EXPECT_STREQ(pszOut, sRef.c_str());
CPLFree(pszOut);
}
}

} // namespace
20 changes: 20 additions & 0 deletions autotest/ogr/ogr_gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10489,3 +10489,23 @@ def test_ogr_gpkg_ST_Area_on_ellipsoid(tmp_vsimem):
) as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f[0] is None


###############################################################################
# Test LAUNDER=YES layer creation option


@gdaltest.enable_exceptions()
def test_ogr_gpkg_launder(tmp_vsimem):

tmpfilename = tmp_vsimem / "test_ogr_gpkg_launder.gpkg"

ds = ogr.GetDriverByName("GPKG").CreateDataSource(tmpfilename)
lyr = ds.CreateLayer(
"az+AZ09_", options=["FID=MY_FID", "GEOMETRY_NAME=MY_GEOM", "LAUNDER=YES"]
)
assert lyr.GetName() == "az_az09_"
assert lyr.GetFIDColumn() == "my_fid"
assert lyr.GetGeometryColumn() == "my_geom"
lyr.CreateField(ogr.FieldDefn("_"))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "x_"
42 changes: 42 additions & 0 deletions autotest/ogr/ogr_pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5911,3 +5911,45 @@ def test_ogr_pg_schema_case_createlayer(pg_ds, tmp_schema):
pg_ds.CreateLayer(f"{tmp_schema_mixedcase}.yet_another_layer")
finally:
pg_ds.ExecuteSQL(f'DROP SCHEMA "{tmp_schema_uppercase}" CASCADE')


###############################################################################
# Test LAUNDER=YES


@gdaltest.enable_exceptions()
def test_ogr_pg_LAUNDER_YES(pg_ds, tmp_schema):

eacute = b"\xC3\xA9".decode("utf-8")
lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=YES"])
assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "_"
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "_"


###############################################################################
# Test LAUNDER=NO


@gdaltest.enable_exceptions()
def test_ogr_pg_LAUNDER_NO(pg_ds, tmp_schema):

eacute = b"\xC3\xA9".decode("utf-8")
lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=NO"])
assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "#"
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "#"


###############################################################################
# Test LAUNDER_ASCII


@gdaltest.enable_exceptions()
def test_ogr_pg_LAUNDER_ASCII(pg_ds, tmp_schema):

eacute = b"\xC3\xA9".decode("utf-8")
lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute, options=["LAUNDER_ASCII=YES"])
assert lyr.GetName() == f"{tmp_schema}.ae"
lyr.CreateField(ogr.FieldDefn("b" + eacute))
assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "be"
60 changes: 60 additions & 0 deletions autotest/ogr/ogr_pgdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -1580,6 +1580,66 @@ def check_and_remove(needle):
)


###############################################################################
# Test LAUNDER=YES


def test_ogr_pgdump_LAUNDER_YES(tmp_vsimem):

eacute = b"\xC3\xA9".decode("utf-8")
filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_YES.sql")
ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename)
lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=YES"])
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
ds = None

f = gdal.VSIFOpenL(filename, "rb")
sql = gdal.VSIFReadL(1, 10000, f).decode("utf8")
gdal.VSIFCloseL(f)
assert '"a' + eacute + '_"' in sql
assert '"b' + eacute + '_"' in sql


###############################################################################
# Test LAUNDER=NO


def test_ogr_pgdump_LAUNDER_NO(tmp_vsimem):

eacute = b"\xC3\xA9".decode("utf-8")
filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_NO.sql")
ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename)
lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=NO"])
lyr.CreateField(ogr.FieldDefn("b" + eacute + "#"))
ds = None

f = gdal.VSIFOpenL(filename, "rb")
sql = gdal.VSIFReadL(1, 10000, f).decode("utf8")
gdal.VSIFCloseL(f)
assert '"a' + eacute + '#"' in sql
assert '"b' + eacute + '#"' in sql


###############################################################################
# Test LAUNDER_ASCII


def test_ogr_pgdump_LAUNDER_ASCII(tmp_vsimem):

eacute = b"\xC3\xA9".decode("utf-8")
filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_ASCII.sql")
ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename)
lyr = ds.CreateLayer("a" + eacute, options=["LAUNDER_ASCII=YES"])
lyr.CreateField(ogr.FieldDefn("b" + eacute))
ds = None

f = gdal.VSIFOpenL(filename, "rb")
sql = gdal.VSIFReadL(1, 10000, f).decode("utf8")
gdal.VSIFCloseL(f)
assert '"ae"' in sql
assert '"be"' in sql


###############################################################################
# Cleanup

Expand Down
15 changes: 15 additions & 0 deletions doc/source/drivers/vector/gpkg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,21 @@ Layer creation options

The following layer creation options are available:

- .. lco:: LAUNDER
:choices: YES, NO
:default: NO
:since: 3.9

Whether layer and field names will be laundered. Laundering makes sure
that the recommendation of https://www.geopackage.org/guidance/getting-started.html
is followed: an identifier should start with a lowercase character and
only use lowercase characters, numbers 0-9, and underscores (_). UTF-8
accented characters in the `Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement>`__
and `Latin Extented-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__
sets are replaced when possible with the closest ASCII letter.
Characters that do not match the recommendation are replaced with underscore.
Consequently this option is not appropriate for non-Latin languages.

- .. lco:: GEOMETRY_NAME
:default: geom

Expand Down
12 changes: 12 additions & 0 deletions doc/source/drivers/vector/pg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,18 @@ Layer Creation Options
some special characters like "-" and "#" to "_". If "NO" exact names
are preserved. If enabled the table (layer) name will also be laundered.

- .. lco:: LAUNDER_ASCII
:choices: YES, NO
:default: NO
:since: 3.9

Implies LAUNDER=YES, with the extra substitution of UTF-8 accented
characters in the `Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement>`__
and `Latin Extented-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__
sets with the closest ASCII letter. Other non-ASCII characters are
replaced with underscore.
Consequently this option is not appropriate for non-Latin languages.

- .. lco:: PRECISION
:choices: YES, NO
:default: YES
Expand Down
15 changes: 13 additions & 2 deletions doc/source/drivers/vector/pgdump.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,19 @@ Layer Creation Options
layer to have their field names "laundered" into a form more
compatible with PostgreSQL. This converts to lower case and converts
some special characters like "-" and "#" to "_". If "NO" exact names
are preserved. If enabled the table
(layer) name will also be laundered.
are preserved. If enabled the table (layer) name will also be laundered.

- .. lco:: LAUNDER_ASCII
:choices: YES, NO
:default: NO
:since: 3.9

Implies LAUNDER=YES, with the extra substitution of UTF-8 accented
characters in the `Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement>`__
and `Latin Extented-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__
sets with the closest ASCII letter. Other non-ASCII characters are
replaced with underscore.
Consequently this option is not appropriate for non-Latin languages.

- .. lco:: PRECISION
:choices: YES, NO
Expand Down
4 changes: 2 additions & 2 deletions frmts/netcdf/netcdfdataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3425,14 +3425,14 @@ void netCDFDataset::SetProjectionFromVar(
for (unsigned int i = 0;
i < strlen(poDS->papszDimName[poDS->nXDimID]) && i < 3; i++)
{
szDimNameX[i] = (char)tolower(static_cast<unsigned char>(
szDimNameX[i] = (char)CPLTolower(static_cast<unsigned char>(
(poDS->papszDimName[poDS->nXDimID])[i]));
}
szDimNameX[3] = '\0';
// for( unsigned int i = 0;
// (i < strlen(poDS->papszDimName[poDS->nYDimID])
// && i < 3 ); i++ ) {
// szDimNameY[i]=(char)tolower(static_cast<unsigned char>((poDS->papszDimName[poDS->nYDimID])[i]));
// szDimNameY[i]=(char)CPLTolower(static_cast<unsigned char>((poDS->papszDimName[poDS->nYDimID])[i]));
// }
// szDimNameY[3] = '\0';
}
Expand Down
21 changes: 11 additions & 10 deletions frmts/pcidsk/sdk/core/pcidsk_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@
#include <cstdarg>
#include <iostream>

extern "C"
{
int CPL_DLL CPLToupper(int c);
int CPL_DLL CPLTolower(int c);
}

#if !defined(va_copy) && defined(__va_copy)
#define va_copy __va_copy
#endif
Expand Down Expand Up @@ -104,8 +110,7 @@ std::string &PCIDSK::UCaseStr( std::string &target )
{
for( unsigned int i = 0; i < target.size(); i++ )
{
if( islower(static_cast<unsigned char>(target[i])) )
target[i] = (char) toupper(static_cast<unsigned char>(target[i]));
target[i] = (char) CPLToupper(static_cast<unsigned char>(target[i]));
}

return target;
Expand Down Expand Up @@ -409,10 +414,8 @@ int PCIDSK::pci_strcasecmp( const char *string1, const char *string2 )
char c1 = string1[i];
char c2 = string2[i];

if( islower(static_cast<unsigned char>(c1)) )
c1 = (char) toupper(static_cast<unsigned char>(c1));
if( islower(static_cast<unsigned char>(c2)) )
c2 = (char) toupper(static_cast<unsigned char>(c2));
c1 = (char) CPLToupper(static_cast<unsigned char>(c1));
c2 = (char) CPLToupper(static_cast<unsigned char>(c2));

if( c1 < c2 )
return -1;
Expand Down Expand Up @@ -447,10 +450,8 @@ int PCIDSK::pci_strncasecmp( const char *string1, const char *string2, size_t le
char c1 = string1[i];
char c2 = string2[i];

if( islower(static_cast<unsigned char>(c1)) )
c1 = (char) toupper(static_cast<unsigned char>(c1));
if( islower(static_cast<unsigned char>(c2)) )
c2 = (char) toupper(static_cast<unsigned char>(c2));
c1 = (char) CPLToupper(static_cast<unsigned char>(c1));
c2 = (char) CPLToupper(static_cast<unsigned char>(c2));

if( c1 < c2 )
return -1;
Expand Down
2 changes: 1 addition & 1 deletion frmts/pds/pds4dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ static CPLString FixupTableFilename(const CPLString &osFilename)
if (!osExt.empty())
{
CPLString osTry(osFilename);
if (islower(static_cast<unsigned char>(osExt[0])))
if (osExt[0] >= 'a' && osExt[0] <= 'z')
{
osTry = CPLResetExtension(osFilename, osExt.toupper());
}
Expand Down
2 changes: 1 addition & 1 deletion gcore/gdal_mdreader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1031,7 +1031,7 @@ static bool GDAL_IMD_AA2R(char ***ppapszIMD)
CPLString osLine;
osTarget.Printf(
"IMAGE_1.%c%s",
tolower(static_cast<unsigned char>(keylist[iKey][0])),
CPLTolower(static_cast<unsigned char>(keylist[iKey][0])),
keylist[iKey] + 1);

osLine = osTarget + "=" + osValue;
Expand Down
4 changes: 2 additions & 2 deletions gcore/gdal_misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2316,9 +2316,9 @@ int GDALReadWorldFile2(const char *pszBaseFilename, const char *pszExtension,
for (int i = 0; szExtUpper[i] != '\0'; i++)
{
szExtUpper[i] = static_cast<char>(
toupper(static_cast<unsigned char>(szExtUpper[i])));
CPLToupper(static_cast<unsigned char>(szExtUpper[i])));
szExtLower[i] = static_cast<char>(
tolower(static_cast<unsigned char>(szExtLower[i])));
CPLTolower(static_cast<unsigned char>(szExtLower[i])));
}

const char *pszTFW = CPLResetExtension(pszBaseFilename, szExtLower);
Expand Down
Loading

0 comments on commit 5ba8582

Please sign in to comment.