diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ae512f085cc8..df38d94bc0bd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: frmts/pcidsk/sdk| frmts/grib/degrib/degrib| frmts/grib/degrib/g2clib| - ogr/utf8.h| + port/utf8.h| ogr/ogrsf_frmts/cad/libopencad/| ogr/ogrsf_frmts/geojson/libjson/| ogr/ogrsf_frmts/flatgeobuf/flatbuffers/| diff --git a/apps/gdaltindex_lib.cpp b/apps/gdaltindex_lib.cpp index 68c15081642c..8bec93493ed2 100644 --- a/apps/gdaltindex_lib.cpp +++ b/apps/gdaltindex_lib.cpp @@ -147,7 +147,7 @@ static bool PatternMatch(const char *input, const char *pattern) } else { - if (tolower(*pattern) != tolower(*input)) + if (CPLTolower(*pattern) != CPLTolower(*input)) { return false; } diff --git a/autotest/cpp/data/utf8accents.txt b/autotest/cpp/data/utf8accents.txt new file mode 100644 index 000000000000..a0c35d225c7a --- /dev/null +++ b/autotest/cpp/data/utf8accents.txt @@ -0,0 +1 @@ +ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢvŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽž diff --git a/autotest/cpp/data/utf8accents_ascii.txt b/autotest/cpp/data/utf8accents_ascii.txt new file mode 100644 index 000000000000..4444beb5ac42 --- /dev/null +++ b/autotest/cpp/data/utf8accents_ascii.txt @@ -0,0 +1 @@ +AAAAAAAECEEEEIIIINOOOOOUUUUYSSaaaaaaaeceeeeiiiinoooooouuuuyuAaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgHhHhIiIiIiIiIiIJijJjKkkLlLlLlLlLlNnNnNnOoOoOoOEoeRrRrRrSsSsSsSsTvTtTtUuUuUuUuUuUuWwYyYZzZzZz diff --git a/autotest/cpp/test_cpl.cpp b/autotest/cpp/test_cpl.cpp index 6dbeabda9806..c96777518efb 100644 --- a/autotest/cpp/test_cpl.cpp +++ b/autotest/cpp/test_cpl.cpp @@ -5129,4 +5129,79 @@ TEST_F(test_cpl, CPLStrtod) } } +TEST_F(test_cpl, CPLForceToASCII) +{ + { + char *pszOut = CPLForceToASCII("foo", -1, '_'); + EXPECT_STREQ(pszOut, "foo"); + CPLFree(pszOut); + } + { + char *pszOut = CPLForceToASCII("foo", 1, '_'); + EXPECT_STREQ(pszOut, "f"); + CPLFree(pszOut); + } + { + char *pszOut = CPLForceToASCII("foo\xFF", -1, '_'); + EXPECT_STREQ(pszOut, "foo_"); + CPLFree(pszOut); + } +} + +TEST_F(test_cpl, CPLUTF8ForceToASCII) +{ + { + char *pszOut = CPLUTF8ForceToASCII("foo", '_'); + EXPECT_STREQ(pszOut, "foo"); + CPLFree(pszOut); + } + { + // Truncated UTF-8 character + char *pszOut = CPLUTF8ForceToASCII("foo\xC0", '_'); + EXPECT_STREQ(pszOut, "foo"); + CPLFree(pszOut); + } + { + char *pszOut = CPLUTF8ForceToASCII("foo\xc2\x80", '_'); + EXPECT_STREQ(pszOut, "foo_"); + CPLFree(pszOut); + } + { + char *pszOut = CPLUTF8ForceToASCII("foo\xc2\x80x", '_'); + EXPECT_STREQ(pszOut, "foo_x"); + CPLFree(pszOut); + } + { + std::string s; + { + VSILFILE *f = + VSIFOpenL((data_ + SEP + "utf8accents.txt").c_str(), "rb"); + ASSERT_NE(f, nullptr); + VSIFSeekL(f, 0, SEEK_END); + s.resize(static_cast(VSIFTellL(f))); + VSIFSeekL(f, 0, SEEK_SET); + VSIFReadL(&s[0], 1, s.size(), f); + VSIFCloseL(f); + while (!s.empty() && s.back() == '\n') + s.pop_back(); + } + std::string sRef; + { + VSILFILE *f = VSIFOpenL( + (data_ + SEP + "utf8accents_ascii.txt").c_str(), "rb"); + ASSERT_NE(f, nullptr); + VSIFSeekL(f, 0, SEEK_END); + sRef.resize(static_cast(VSIFTellL(f))); + VSIFSeekL(f, 0, SEEK_SET); + VSIFReadL(&sRef[0], 1, sRef.size(), f); + VSIFCloseL(f); + while (!sRef.empty() && sRef.back() == '\n') + sRef.pop_back(); + } + char *pszOut = CPLUTF8ForceToASCII(s.c_str(), '_'); + EXPECT_STREQ(pszOut, sRef.c_str()); + CPLFree(pszOut); + } +} + } // namespace diff --git a/autotest/ogr/ogr_gpkg.py b/autotest/ogr/ogr_gpkg.py index 394ca882730c..28ef09e51411 100755 --- a/autotest/ogr/ogr_gpkg.py +++ b/autotest/ogr/ogr_gpkg.py @@ -10489,3 +10489,23 @@ def test_ogr_gpkg_ST_Area_on_ellipsoid(tmp_vsimem): ) as sql_lyr: f = sql_lyr.GetNextFeature() assert f[0] is None + + +############################################################################### +# Test LAUNDER=YES layer creation option + + +@gdaltest.enable_exceptions() +def test_ogr_gpkg_launder(tmp_vsimem): + + tmpfilename = tmp_vsimem / "test_ogr_gpkg_launder.gpkg" + + ds = ogr.GetDriverByName("GPKG").CreateDataSource(tmpfilename) + lyr = ds.CreateLayer( + "az+AZ09_", options=["FID=MY_FID", "GEOMETRY_NAME=MY_GEOM", "LAUNDER=YES"] + ) + assert lyr.GetName() == "az_az09_" + assert lyr.GetFIDColumn() == "my_fid" + assert lyr.GetGeometryColumn() == "my_geom" + lyr.CreateField(ogr.FieldDefn("_")) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "x_" diff --git a/autotest/ogr/ogr_pg.py b/autotest/ogr/ogr_pg.py index 0e904fd8b2f1..1845381d3b76 100755 --- a/autotest/ogr/ogr_pg.py +++ b/autotest/ogr/ogr_pg.py @@ -5911,3 +5911,45 @@ def test_ogr_pg_schema_case_createlayer(pg_ds, tmp_schema): pg_ds.CreateLayer(f"{tmp_schema_mixedcase}.yet_another_layer") finally: pg_ds.ExecuteSQL(f'DROP SCHEMA "{tmp_schema_uppercase}" CASCADE') + + +############################################################################### +# Test LAUNDER=YES + + +@gdaltest.enable_exceptions() +def test_ogr_pg_LAUNDER_YES(pg_ds, tmp_schema): + + eacute = b"\xC3\xA9".decode("utf-8") + lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=YES"]) + assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "_" + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "_" + + +############################################################################### +# Test LAUNDER=NO + + +@gdaltest.enable_exceptions() +def test_ogr_pg_LAUNDER_NO(pg_ds, tmp_schema): + + eacute = b"\xC3\xA9".decode("utf-8") + lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute + "#", options=["LAUNDER=NO"]) + assert lyr.GetName() == f"{tmp_schema}.a" + eacute + "#" + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "b" + eacute + "#" + + +############################################################################### +# Test LAUNDER_ASCII + + +@gdaltest.enable_exceptions() +def test_ogr_pg_LAUNDER_ASCII(pg_ds, tmp_schema): + + eacute = b"\xC3\xA9".decode("utf-8") + lyr = pg_ds.CreateLayer(f"{tmp_schema}.a" + eacute, options=["LAUNDER_ASCII=YES"]) + assert lyr.GetName() == f"{tmp_schema}.ae" + lyr.CreateField(ogr.FieldDefn("b" + eacute)) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetNameRef() == "be" diff --git a/autotest/ogr/ogr_pgdump.py b/autotest/ogr/ogr_pgdump.py index ec92a8491889..fddb6cf7bfe5 100755 --- a/autotest/ogr/ogr_pgdump.py +++ b/autotest/ogr/ogr_pgdump.py @@ -1580,6 +1580,66 @@ def check_and_remove(needle): ) +############################################################################### +# Test LAUNDER=YES + + +def test_ogr_pgdump_LAUNDER_YES(tmp_vsimem): + + eacute = b"\xC3\xA9".decode("utf-8") + filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_YES.sql") + ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename) + lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=YES"]) + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + ds = None + + f = gdal.VSIFOpenL(filename, "rb") + sql = gdal.VSIFReadL(1, 10000, f).decode("utf8") + gdal.VSIFCloseL(f) + assert '"a' + eacute + '_"' in sql + assert '"b' + eacute + '_"' in sql + + +############################################################################### +# Test LAUNDER=NO + + +def test_ogr_pgdump_LAUNDER_NO(tmp_vsimem): + + eacute = b"\xC3\xA9".decode("utf-8") + filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_NO.sql") + ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename) + lyr = ds.CreateLayer("a" + eacute + "#", options=["LAUNDER=NO"]) + lyr.CreateField(ogr.FieldDefn("b" + eacute + "#")) + ds = None + + f = gdal.VSIFOpenL(filename, "rb") + sql = gdal.VSIFReadL(1, 10000, f).decode("utf8") + gdal.VSIFCloseL(f) + assert '"a' + eacute + '#"' in sql + assert '"b' + eacute + '#"' in sql + + +############################################################################### +# Test LAUNDER_ASCII + + +def test_ogr_pgdump_LAUNDER_ASCII(tmp_vsimem): + + eacute = b"\xC3\xA9".decode("utf-8") + filename = str(tmp_vsimem / "test_ogr_pgdump_LAUNDER_ASCII.sql") + ds = ogr.GetDriverByName("PGDump").CreateDataSource(filename) + lyr = ds.CreateLayer("a" + eacute, options=["LAUNDER_ASCII=YES"]) + lyr.CreateField(ogr.FieldDefn("b" + eacute)) + ds = None + + f = gdal.VSIFOpenL(filename, "rb") + sql = gdal.VSIFReadL(1, 10000, f).decode("utf8") + gdal.VSIFCloseL(f) + assert '"ae"' in sql + assert '"be"' in sql + + ############################################################################### # Cleanup diff --git a/doc/source/drivers/vector/gpkg.rst b/doc/source/drivers/vector/gpkg.rst index cf33fb484284..319278304ad7 100644 --- a/doc/source/drivers/vector/gpkg.rst +++ b/doc/source/drivers/vector/gpkg.rst @@ -342,6 +342,21 @@ Layer creation options The following layer creation options are available: +- .. lco:: LAUNDER + :choices: YES, NO + :default: NO + :since: 3.9 + + Whether layer and field names will be laundered. Laundering makes sure + that the recommendation of https://www.geopackage.org/guidance/getting-started.html + is followed: an identifier should start with a lowercase character and + only use lowercase characters, numbers 0-9, and underscores (_). UTF-8 + accented characters in the `Latin-1 Supplement `__ + and `Latin Extented-A `__ + sets are replaced when possible with the closest ASCII letter. + Characters that do not match the recommendation are replaced with underscore. + Consequently this option is not appropriate for non-Latin languages. + - .. lco:: GEOMETRY_NAME :default: geom diff --git a/doc/source/drivers/vector/pg.rst b/doc/source/drivers/vector/pg.rst index 0a9e1c2749c0..2e5e26c5e7c8 100644 --- a/doc/source/drivers/vector/pg.rst +++ b/doc/source/drivers/vector/pg.rst @@ -268,6 +268,18 @@ Layer Creation Options some special characters like "-" and "#" to "_". If "NO" exact names are preserved. If enabled the table (layer) name will also be laundered. +- .. lco:: LAUNDER_ASCII + :choices: YES, NO + :default: NO + :since: 3.9 + + Implies LAUNDER=YES, with the extra substitution of UTF-8 accented + characters in the `Latin-1 Supplement `__ + and `Latin Extented-A `__ + sets with the closest ASCII letter. Other non-ASCII characters are + replaced with underscore. + Consequently this option is not appropriate for non-Latin languages. + - .. lco:: PRECISION :choices: YES, NO :default: YES diff --git a/doc/source/drivers/vector/pgdump.rst b/doc/source/drivers/vector/pgdump.rst index 7fd0ad96be28..4a689aca4649 100644 --- a/doc/source/drivers/vector/pgdump.rst +++ b/doc/source/drivers/vector/pgdump.rst @@ -63,8 +63,19 @@ Layer Creation Options layer to have their field names "laundered" into a form more compatible with PostgreSQL. This converts to lower case and converts some special characters like "-" and "#" to "_". If "NO" exact names - are preserved. If enabled the table - (layer) name will also be laundered. + are preserved. If enabled the table (layer) name will also be laundered. + +- .. lco:: LAUNDER_ASCII + :choices: YES, NO + :default: NO + :since: 3.9 + + Implies LAUNDER=YES, with the extra substitution of UTF-8 accented + characters in the `Latin-1 Supplement `__ + and `Latin Extented-A `__ + sets with the closest ASCII letter. Other non-ASCII characters are + replaced with underscore. + Consequently this option is not appropriate for non-Latin languages. - .. lco:: PRECISION :choices: YES, NO diff --git a/frmts/netcdf/netcdfdataset.cpp b/frmts/netcdf/netcdfdataset.cpp index b170b0e65823..68cf0c28cad4 100644 --- a/frmts/netcdf/netcdfdataset.cpp +++ b/frmts/netcdf/netcdfdataset.cpp @@ -3425,14 +3425,14 @@ void netCDFDataset::SetProjectionFromVar( for (unsigned int i = 0; i < strlen(poDS->papszDimName[poDS->nXDimID]) && i < 3; i++) { - szDimNameX[i] = (char)tolower(static_cast( + szDimNameX[i] = (char)CPLTolower(static_cast( (poDS->papszDimName[poDS->nXDimID])[i])); } szDimNameX[3] = '\0'; // for( unsigned int i = 0; // (i < strlen(poDS->papszDimName[poDS->nYDimID]) // && i < 3 ); i++ ) { - // szDimNameY[i]=(char)tolower(static_cast((poDS->papszDimName[poDS->nYDimID])[i])); + // szDimNameY[i]=(char)CPLTolower(static_cast((poDS->papszDimName[poDS->nYDimID])[i])); // } // szDimNameY[3] = '\0'; } diff --git a/frmts/pcidsk/sdk/core/pcidsk_utils.cpp b/frmts/pcidsk/sdk/core/pcidsk_utils.cpp index 169c3891ff48..45c1dbd7f256 100644 --- a/frmts/pcidsk/sdk/core/pcidsk_utils.cpp +++ b/frmts/pcidsk/sdk/core/pcidsk_utils.cpp @@ -40,6 +40,12 @@ #include #include +extern "C" +{ +int CPL_DLL CPLToupper(int c); +int CPL_DLL CPLTolower(int c); +} + #if !defined(va_copy) && defined(__va_copy) #define va_copy __va_copy #endif @@ -104,8 +110,7 @@ std::string &PCIDSK::UCaseStr( std::string &target ) { for( unsigned int i = 0; i < target.size(); i++ ) { - if( islower(static_cast(target[i])) ) - target[i] = (char) toupper(static_cast(target[i])); + target[i] = (char) CPLToupper(static_cast(target[i])); } return target; @@ -409,10 +414,8 @@ int PCIDSK::pci_strcasecmp( const char *string1, const char *string2 ) char c1 = string1[i]; char c2 = string2[i]; - if( islower(static_cast(c1)) ) - c1 = (char) toupper(static_cast(c1)); - if( islower(static_cast(c2)) ) - c2 = (char) toupper(static_cast(c2)); + c1 = (char) CPLToupper(static_cast(c1)); + c2 = (char) CPLToupper(static_cast(c2)); if( c1 < c2 ) return -1; @@ -447,10 +450,8 @@ int PCIDSK::pci_strncasecmp( const char *string1, const char *string2, size_t le char c1 = string1[i]; char c2 = string2[i]; - if( islower(static_cast(c1)) ) - c1 = (char) toupper(static_cast(c1)); - if( islower(static_cast(c2)) ) - c2 = (char) toupper(static_cast(c2)); + c1 = (char) CPLToupper(static_cast(c1)); + c2 = (char) CPLToupper(static_cast(c2)); if( c1 < c2 ) return -1; diff --git a/frmts/pds/pds4dataset.cpp b/frmts/pds/pds4dataset.cpp index dd532d3676f4..9862a3ea01bb 100644 --- a/frmts/pds/pds4dataset.cpp +++ b/frmts/pds/pds4dataset.cpp @@ -1421,7 +1421,7 @@ static CPLString FixupTableFilename(const CPLString &osFilename) if (!osExt.empty()) { CPLString osTry(osFilename); - if (islower(static_cast(osExt[0]))) + if (osExt[0] >= 'a' && osExt[0] <= 'z') { osTry = CPLResetExtension(osFilename, osExt.toupper()); } diff --git a/gcore/gdal_mdreader.cpp b/gcore/gdal_mdreader.cpp index 2d78e4de842c..18684e492c9e 100644 --- a/gcore/gdal_mdreader.cpp +++ b/gcore/gdal_mdreader.cpp @@ -1031,7 +1031,7 @@ static bool GDAL_IMD_AA2R(char ***ppapszIMD) CPLString osLine; osTarget.Printf( "IMAGE_1.%c%s", - tolower(static_cast(keylist[iKey][0])), + CPLTolower(static_cast(keylist[iKey][0])), keylist[iKey] + 1); osLine = osTarget + "=" + osValue; diff --git a/gcore/gdal_misc.cpp b/gcore/gdal_misc.cpp index 0e9feaad5da8..a00be33fda88 100644 --- a/gcore/gdal_misc.cpp +++ b/gcore/gdal_misc.cpp @@ -2316,9 +2316,9 @@ int GDALReadWorldFile2(const char *pszBaseFilename, const char *pszExtension, for (int i = 0; szExtUpper[i] != '\0'; i++) { szExtUpper[i] = static_cast( - toupper(static_cast(szExtUpper[i]))); + CPLToupper(static_cast(szExtUpper[i]))); szExtLower[i] = static_cast( - tolower(static_cast(szExtLower[i]))); + CPLTolower(static_cast(szExtLower[i]))); } const char *pszTFW = CPLResetExtension(pszBaseFilename, szExtLower); diff --git a/ogr/ogr_srs_pci.cpp b/ogr/ogr_srs_pci.cpp index 089720c2dd00..67f650a61a81 100644 --- a/ogr/ogr_srs_pci.cpp +++ b/ogr/ogr_srs_pci.cpp @@ -236,7 +236,7 @@ OGRErr OGRSpatialReference::importFromPCI(const char *pszProj, if (nCode >= -99 && nCode <= 999) snprintf(szEarthModel, sizeof(szEarthModel), "%c%03d", - toupper(static_cast(*pszEM)), nCode); + CPLToupper(static_cast(*pszEM)), nCode); break; } diff --git a/ogr/ogrsf_frmts/avc/avc_binwr.cpp b/ogr/ogrsf_frmts/avc/avc_binwr.cpp index 2e7828179d32..5febe68bc65a 100644 --- a/ogr/ogrsf_frmts/avc/avc_binwr.cpp +++ b/ogr/ogrsf_frmts/avc/avc_binwr.cpp @@ -1793,7 +1793,8 @@ AVCBinFile *AVCBinWriteCreateTable(const char *pszInfoPath, for (i = 0; *pszPtr != '\0' && *pszPtr != '.' && *pszPtr != ' '; i++, pszPtr++) { - szCoverName[i] = (char)tolower(static_cast(*pszPtr)); + szCoverName[i] = + (char)CPLTolower(static_cast(*pszPtr)); } szCoverName[i] = '\0'; @@ -1802,13 +1803,14 @@ AVCBinFile *AVCBinWriteCreateTable(const char *pszInfoPath, for (i = 0; i < 3 && *pszPtr != '\0' && *pszPtr != ' '; i++, pszPtr++) { - szExt[i] = (char)tolower(static_cast(*pszPtr)); + szExt[i] = (char)CPLTolower(static_cast(*pszPtr)); } szExt[i] = '\0'; for (i = 0; *pszPtr != '\0' && *pszPtr != ' '; i++, pszPtr++) { - szSubclass[i] = (char)tolower(static_cast(*pszPtr)); + szSubclass[i] = + (char)CPLTolower(static_cast(*pszPtr)); } szSubclass[i] = '\0'; @@ -1980,7 +1982,7 @@ AVCBinFile *_AVCBinWriteCreateDBFTable(const char *pszPath, i++, pszDBFBasename++) { psFile->pszFilename[i] = - (char)tolower(static_cast(*pszDBFBasename)); + (char)CPLTolower(static_cast(*pszDBFBasename)); } strcat(psFile->pszFilename, ".dbf"); diff --git a/ogr/ogrsf_frmts/avc/avc_e00gen.cpp b/ogr/ogrsf_frmts/avc/avc_e00gen.cpp index d0b848c1dd48..2d1b49d92064 100644 --- a/ogr/ogrsf_frmts/avc/avc_e00gen.cpp +++ b/ogr/ogrsf_frmts/avc/avc_e00gen.cpp @@ -169,7 +169,7 @@ const char *AVCE00GenStartSection(AVCE00GenInfo *psInfo, AVCFileType eType, for (i = 0; pszClassName[i] != '\0'; i++) { psInfo->pszBuf[i] = - (char)toupper(static_cast(pszClassName[i])); + (char)CPLToupper(static_cast(pszClassName[i])); } psInfo->pszBuf[i] = '\0'; } diff --git a/ogr/ogrsf_frmts/avc/avc_e00read.cpp b/ogr/ogrsf_frmts/avc/avc_e00read.cpp index d397573412c8..255ac4ba05c6 100644 --- a/ogr/ogrsf_frmts/avc/avc_e00read.cpp +++ b/ogr/ogrsf_frmts/avc/avc_e00read.cpp @@ -1008,7 +1008,7 @@ static int _AVCE00ReadBuildSqueleton(AVCE00ReadPtr psInfo, char **papszCoverDir) CPLSPrintf("EXP 0 %s%s.E00", szCWD, osCoverPathTruncated.c_str())); pcTmp = pszEXPPath; for (; *pcTmp != '\0'; pcTmp++) - *pcTmp = (char)toupper(static_cast(*pcTmp)); + *pcTmp = (char)CPLToupper(static_cast(*pcTmp)); /*----------------------------------------------------------------- * EXP Header @@ -1309,7 +1309,8 @@ static int _AVCE00ReadBuildSqueleton(AVCE00ReadPtr psInfo, char **papszCoverDir) papszCoverDir[iFile]); pcTmp = (char *)szFname; for (; *pcTmp != '\0'; pcTmp++) - *pcTmp = (char)toupper(static_cast(*pcTmp)); + *pcTmp = + (char)CPLToupper(static_cast(*pcTmp)); papszCoverDir[iFile][nLen - 4] = '.'; papszTables = CSLAddString(papszTables, szFname); diff --git a/ogr/ogrsf_frmts/avc/avc_e00write.cpp b/ogr/ogrsf_frmts/avc/avc_e00write.cpp index 789feddfa9fb..67f5578cc3a4 100644 --- a/ogr/ogrsf_frmts/avc/avc_e00write.cpp +++ b/ogr/ogrsf_frmts/avc/avc_e00write.cpp @@ -480,7 +480,8 @@ static void _AVCE00WriteRenameTable(AVCTableDef *psTableDef, snprintf(szNewName, sizeof(szNewName), "%s", pszNewCoverName); for (i = 0; szNewName[i] != '\0'; i++) - szNewName[i] = (char)toupper(static_cast(szNewName[i])); + szNewName[i] = + (char)CPLToupper(static_cast(szNewName[i])); /*----------------------------------------------------------------- * Extract components from the current table name. @@ -662,7 +663,7 @@ static int _AVCE00WriteCreateCoverFile(AVCE00WritePtr psInfo, AVCFileType eType, * Make sure filename is all lowercase and attempt to create the file *----------------------------------------------------------------*/ for (i = 0; szFname[i] != '\0'; i++) - szFname[i] = (char)tolower(static_cast(szFname[i])); + szFname[i] = (char)CPLTolower(static_cast(szFname[i])); if (nStatus == 0) { @@ -961,8 +962,8 @@ int AVCE00DeleteCoverage(const char *pszCoverToDelete) { /* Convert table filename to lowercases */ for (j = 0; papszFiles[i] && papszFiles[i][j] != '\0'; j++) - papszFiles[i][j] = - (char)tolower(static_cast(papszFiles[i][j])); + papszFiles[i][j] = (char)CPLTolower( + static_cast(papszFiles[i][j])); /* Delete the .DAT file */ pszFname = CPLSPrintf("%s%s.dat", pszInfoPath, papszFiles[i]); diff --git a/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp b/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp index 3b8ca2da213f..a50ec0d778ac 100644 --- a/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp +++ b/ogr/ogrsf_frmts/carto/ogrcartodatasource.cpp @@ -458,7 +458,7 @@ OGRCARTODataSource::ICreateLayer(const char *pszNameIn, CPLString osName(pszNameIn); if (CPLFetchBool(papszOptions, "LAUNDER", true)) { - char *pszTmp = OGRPGCommonLaunderName(pszNameIn); + char *pszTmp = OGRPGCommonLaunderName(pszNameIn, "CARTO", false); osName = pszTmp; CPLFree(pszTmp); } diff --git a/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp b/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp index 70d4275edf80..695632dcd17c 100644 --- a/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp +++ b/ogr/ogrsf_frmts/carto/ogrcartotablelayer.cpp @@ -671,7 +671,7 @@ OGRCARTOTableLayer::CreateGeomField(const OGRGeomFieldDefn *poGeomFieldIn, if (bLaunderColumnNames) { char *pszSafeName = - OGRPGCommonLaunderName(poGeomField->GetNameRef(), "PG"); + OGRPGCommonLaunderName(poGeomField->GetNameRef(), "CARTO", false); poGeomField->SetName(pszSafeName); CPLFree(pszSafeName); } @@ -735,7 +735,8 @@ OGRErr OGRCARTOTableLayer::CreateField(const OGRFieldDefn *poFieldIn, OGRFieldDefn oField(poFieldIn); if (bLaunderColumnNames) { - char *pszName = OGRPGCommonLaunderName(oField.GetNameRef()); + char *pszName = + OGRPGCommonLaunderName(oField.GetNameRef(), "CARTO", false); oField.SetName(pszName); CPLFree(pszName); } diff --git a/ogr/ogrsf_frmts/dxf/ogr_autocad_services.cpp b/ogr/ogrsf_frmts/dxf/ogr_autocad_services.cpp index cf19f856544e..7e37fbf2b9ec 100644 --- a/ogr/ogrsf_frmts/dxf/ogr_autocad_services.cpp +++ b/ogr/ogrsf_frmts/dxf/ogr_autocad_services.cpp @@ -67,7 +67,7 @@ CPLString ACTextUnescape(const char *pszRawInput, const char *pszEncoding, if (pszInput[1] == ' ') osResult += '^'; else - osResult += static_cast(toupper( + osResult += static_cast(CPLToupper( static_cast(pszInput[1]))) ^ 0x40; pszInput++; diff --git a/ogr/ogrsf_frmts/edigeo/ogredigeodatasource.cpp b/ogr/ogrsf_frmts/edigeo/ogredigeodatasource.cpp index aeda2eb0fd9a..d966bd15f764 100644 --- a/ogr/ogrsf_frmts/edigeo/ogredigeodatasource.cpp +++ b/ogr/ogrsf_frmts/edigeo/ogredigeodatasource.cpp @@ -188,10 +188,8 @@ VSILFILE *OGREDIGEODataSource::OpenFile(const char *pszType, VSILFILE *fp = VSIFOpenL(osFilename, "rb"); if (fp == nullptr) { - CPLString osExtLower = osExt; - for (int i = 0; i < (int)osExt.size(); i++) - osExtLower[i] = (char)tolower(static_cast(osExt[i])); - CPLString osFilename2 = CPLFormCIFilename( + const CPLString osExtLower = CPLString(osExt).tolower(); + const CPLString osFilename2 = CPLFormCIFilename( CPLGetPath(pszName), osTmp.c_str(), osExtLower.c_str()); fp = VSIFOpenL(osFilename2, "rb"); if (fp == nullptr) diff --git a/ogr/ogrsf_frmts/elastic/ogrelasticlayer.cpp b/ogr/ogrsf_frmts/elastic/ogrelasticlayer.cpp index bb60c85390dc..ebfbdeea7799 100644 --- a/ogr/ogrsf_frmts/elastic/ogrelasticlayer.cpp +++ b/ogr/ogrsf_frmts/elastic/ogrelasticlayer.cpp @@ -1415,7 +1415,8 @@ static void decode_geohash_bbox(const char *geohash, double lat[2], hashlen = static_cast(strlen(geohash)); for (i = 0; i < hashlen; i++) { - c = static_cast(tolower(static_cast(geohash[i]))); + c = static_cast( + CPLTolower(static_cast(geohash[i]))); cd = static_cast(strchr(BASE32, c) - BASE32); for (j = 0; j < 5; j++) { diff --git a/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp b/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp index 62c52eaba5bd..4635659c873c 100644 --- a/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp +++ b/ogr/ogrsf_frmts/gmlas/ogrgmlaslayer.cpp @@ -1262,7 +1262,8 @@ CPLString OGRGMLASLayer::LaunderFieldName(const CPLString &osFieldName) if (m_poDS->GetConf().m_bPGIdentifierLaundering) { - char *pszLaundered = OGRPGCommonLaunderName(osLaunderedName, "GMLAS"); + char *pszLaundered = + OGRPGCommonLaunderName(osLaunderedName, "GMLAS", false); osLaunderedName = pszLaundered; CPLFree(pszLaundered); } diff --git a/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp b/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp index cb7bd6c7aba6..ad5f3b6d4864 100644 --- a/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp +++ b/ogr/ogrsf_frmts/gmlas/ogrgmlasschemaanalyzer.cpp @@ -549,7 +549,7 @@ bool GMLASSchemaAnalyzer::LaunderFieldNames(GMLASFeatureClass &oClass) for (size_t i = 0; i < aoFields.size(); i++) { char *pszLaundered = - OGRPGCommonLaunderName(aoFields[i].GetName(), "GMLAS"); + OGRPGCommonLaunderName(aoFields[i].GetName(), "GMLAS", false); aoFields[i].SetName(pszLaundered); CPLFree(pszLaundered); } @@ -640,7 +640,7 @@ void GMLASSchemaAnalyzer::LaunderClassNames() for (size_t i = 0; i < aoClasses.size(); i++) { char *pszLaundered = - OGRPGCommonLaunderName(aoClasses[i]->GetName(), "GMLAS"); + OGRPGCommonLaunderName(aoClasses[i]->GetName(), "GMLAS", false); aoClasses[i]->SetName(pszLaundered); CPLFree(pszLaundered); } diff --git a/ogr/ogrsf_frmts/gmlas/ogrgmlasutils.cpp b/ogr/ogrsf_frmts/gmlas/ogrgmlasutils.cpp index 82f0de613598..910b98501417 100644 --- a/ogr/ogrsf_frmts/gmlas/ogrgmlasutils.cpp +++ b/ogr/ogrsf_frmts/gmlas/ogrgmlasutils.cpp @@ -52,8 +52,7 @@ CPLString OGRGMLASTruncateIdentifier(const CPLString &osName, const char *pszToken = papszTokens[j]; bool bIsCamelCase = false; // Split parts like camelCase or CamelCase into several tokens - if (pszToken[0] != '\0' && - islower(static_cast(pszToken[1]))) + if (pszToken[0] != '\0' && pszToken[1] >= 'a' && pszToken[1] <= 'z') { bIsCamelCase = true; bool bLastIsLower = true; @@ -63,7 +62,7 @@ CPLString OGRGMLASTruncateIdentifier(const CPLString &osName, osCurrentPart += pszToken[1]; for (int k = 2; pszToken[k]; ++k) { - if (isupper(static_cast(pszToken[k]))) + if (pszToken[k] >= 'A' && pszToken[k] <= 'Z') { if (!bLastIsLower) { diff --git a/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h b/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h index 031cabcf565d..eeb300ce8d69 100644 --- a/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h +++ b/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h @@ -374,6 +374,8 @@ class GDALGeoPackageDataset final : public OGRSQLiteBaseDataSource, return nSoftTransactionLevel > 0; } + static std::string LaunderName(const std::string &osStr); + // At least 100000 to avoid conflicting with EPSG codes static constexpr int FIRST_CUSTOM_SRSID = 100000; @@ -707,6 +709,7 @@ class OGRGeoPackageTableLayer final : public OGRGeoPackageLayer bool m_bTruncateFields = false; bool m_bDeferredCreation = false; bool m_bTableCreatedInTransaction = false; + bool m_bLaunder = false; int m_iFIDAsRegularColumnIndex = -1; std::string m_osInsertionBuffer{}; // used by FeatureBindParameters to // store datetime values @@ -977,6 +980,11 @@ class OGRGeoPackageTableLayer final : public OGRGeoPackageLayer m_bTruncateFields = CPL_TO_BOOL(bFlag); } + void SetLaunder(bool bFlag) + { + m_bLaunder = bFlag; + } + OGRErr RunDeferredCreationIfNecessary(); bool RunDeferredDropRTreeTableIfNecessary(); bool DoJobAtTransactionCommit(); diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp index 9701755e14de..5d478837fc10 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp @@ -6660,6 +6660,72 @@ OGRLayer *GDALGeoPackageDataset::GetLayer(int iLayer) return m_papoLayers[iLayer]; } +/************************************************************************/ +/* LaunderName() */ +/************************************************************************/ + +/** Launder identifiers (table, column names) according to guidance at + * https://www.geopackage.org/guidance/getting-started.html: + * "For maximum interoperability, start your database identifiers (table names, + * column names, etc.) with a lowercase character and only use lowercase + * characters, numbers 0-9, and underscores (_)." + */ + +/* static */ +std::string GDALGeoPackageDataset::LaunderName(const std::string &osStr) +{ + char *pszASCII = CPLUTF8ForceToASCII(osStr.c_str(), '_'); + const std::string osStrASCII(pszASCII); + CPLFree(pszASCII); + + std::string osRet; + osRet.reserve(osStrASCII.size()); + + for (size_t i = 0; i < osStrASCII.size(); ++i) + { + if (osRet.empty()) + { + if (osStrASCII[i] >= 'A' && osStrASCII[i] <= 'Z') + { + osRet += (osStrASCII[i] - 'A' + 'a'); + } + else if (osStrASCII[i] >= 'a' && osStrASCII[i] <= 'z') + { + osRet += osStrASCII[i]; + } + else + { + continue; + } + } + else if (osStrASCII[i] >= 'A' && osStrASCII[i] <= 'Z') + { + osRet += (osStrASCII[i] - 'A' + 'a'); + } + else if ((osStrASCII[i] >= 'a' && osStrASCII[i] <= 'z') || + (osStrASCII[i] >= '0' && osStrASCII[i] <= '9') || + osStrASCII[i] == '_') + { + osRet += osStrASCII[i]; + } + else + { + osRet += '_'; + } + } + + if (osRet.empty() && !osStrASCII.empty()) + return LaunderName(std::string("x").append(osStrASCII)); + + if (osRet != osStr) + { + CPLDebug("PG", "LaunderName('%s') -> '%s'", osStr.c_str(), + osRet.c_str()); + } + + return osRet; +} + /************************************************************************/ /* ICreateLayer() */ /************************************************************************/ @@ -6682,6 +6748,11 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, return nullptr; } + const bool bLaunder = + CPLTestBool(CSLFetchNameValueDef(papszOptions, "LAUNDER", "NO")); + const std::string osTableName(bLaunder ? LaunderName(pszLayerName) + : std::string(pszLayerName)); + const auto eGType = poSrcGeomFieldDefn ? poSrcGeomFieldDefn->GetType() : wkbNone; const auto poSpatialRef = @@ -6710,7 +6781,7 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, pszOtherIdentifier = m_papoLayers[i]->GetName(); if (pszOtherIdentifier != nullptr && EQUAL(pszOtherIdentifier, pszIdentifier) && - !EQUAL(m_papoLayers[i]->GetName(), pszLayerName)) + !EQUAL(m_papoLayers[i]->GetName(), osTableName.c_str())) { CPLError(CE_Failure, CPLE_AppDefined, "Identifier %s is already used by table %s", @@ -6729,7 +6800,7 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, sqlite3_free(pszSQL); if (oResult && oResult->RowCount() > 0 && oResult->GetValue(0, 0) != nullptr && - !EQUAL(oResult->GetValue(0, 0), pszLayerName)) + !EQUAL(oResult->GetValue(0, 0), osTableName.c_str())) { CPLError(CE_Failure, CPLE_AppDefined, "Identifier %s is already used by table %s", pszIdentifier, @@ -6772,7 +6843,7 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, /* Avoiding gpkg prefixes is not an official requirement, but seems wise */ - if (STARTS_WITH(pszLayerName, "gpkg")) + if (STARTS_WITH(osTableName.c_str(), "gpkg")) { CPLError(CE_Failure, CPLE_AppDefined, "The layer name may not begin with 'gpkg' as it is a " @@ -6782,7 +6853,8 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, /* Preemptively try and avoid sqlite3 syntax errors due to */ /* illegal characters. */ - if (strspn(pszLayerName, "`~!@#$%^&*()+-={}|[]\\:\";'<>?,./") > 0) + if (strspn(osTableName.c_str(), "`~!@#$%^&*()+-={}|[]\\:\";'<>?,./") > + 0) { CPLError( CE_Failure, CPLE_AppDefined, @@ -6794,7 +6866,7 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, /* Check for any existing layers that already use this name */ for (int iLayer = 0; iLayer < m_nLayers; iLayer++) { - if (EQUAL(pszLayerName, m_papoLayers[iLayer]->GetName())) + if (EQUAL(osTableName.c_str(), m_papoLayers[iLayer]->GetName())) { const char *pszOverwrite = CSLFetchNameValue(papszOptions, "OVERWRITE"); @@ -6808,7 +6880,7 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, "Layer %s already exists, CreateLayer failed.\n" "Use the layer creation option OVERWRITE=YES to " "replace it.", - pszLayerName); + osTableName.c_str()); return nullptr; } } @@ -6824,7 +6896,7 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, /* Create a blank layer. */ auto poLayer = std::unique_ptr( - new OGRGeoPackageTableLayer(this, pszLayerName)); + new OGRGeoPackageTableLayer(this, osTableName.c_str())); OGRSpatialReference *poSRS = nullptr; if (poSpatialRef) @@ -6833,7 +6905,9 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER); } poLayer->SetCreationParameters( - eGType, pszGeomColumnName, bGeomNullable, poSRS, + eGType, + bLaunder ? LaunderName(pszGeomColumnName).c_str() : pszGeomColumnName, + bGeomNullable, poSRS, CSLFetchNameValue(papszOptions, "SRID"), poSrcGeomFieldDefn ? poSrcGeomFieldDefn->GetCoordinatePrecision() : OGRGeomCoordinatePrecision(), @@ -6841,13 +6915,15 @@ GDALGeoPackageDataset::ICreateLayer(const char *pszLayerName, CSLFetchNameValueDef(papszOptions, "DISCARD_COORD_LSB", "NO")), CPLTestBool(CSLFetchNameValueDef( papszOptions, "UNDO_DISCARD_COORD_LSB_ON_READING", "NO")), - pszFIDColumnName, pszIdentifier, - CSLFetchNameValue(papszOptions, "DESCRIPTION")); + bLaunder ? LaunderName(pszFIDColumnName).c_str() : pszFIDColumnName, + pszIdentifier, CSLFetchNameValue(papszOptions, "DESCRIPTION")); if (poSRS) { poSRS->Release(); } + poLayer->SetLaunder(bLaunder); + /* Should we create a spatial index ? */ const char *pszSI = CSLFetchNameValue(papszOptions, "SPATIAL_INDEX"); int bCreateSpatialIndex = (pszSI == nullptr || CPLTestBool(pszSI)); diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagedriver.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagedriver.cpp index 5ff549b492ac..a6111fd8dd6b 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagedriver.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagedriver.cpp @@ -670,6 +670,8 @@ void RegisterOGRGeoPackage() poDriver->SetMetadataItem( GDAL_DS_LAYER_CREATIONOPTIONLIST, "" + "