Skip to content

Commit

Permalink
OSM: properly escape special characters with TAGS_FORMAT=JSON open op…
Browse files Browse the repository at this point in the history
…tion

Fixes #9673
  • Loading branch information
rouault committed Apr 16, 2024
1 parent 5fdc9b5 commit 3312a99
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 57 deletions.
7 changes: 7 additions & 0 deletions autotest/ogr/data/osm/test_json.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="hand">
<bounds minlat="49" minlon="2" maxlat="50" maxlon="3"/>
<node id="3" lat="49.5" lon="3" user="some_user" uid="1" version="1" changeset="1" timestamp="2012-07-10T00:00:00Z">
<tag k="foo" v="x'\&quot;&#9;&#10;&#13;y"/>
</node>
</osm>
Binary file added autotest/ogr/data/osm/test_json.pbf
Binary file not shown.
18 changes: 18 additions & 0 deletions autotest/ogr/ogr_osm.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,3 +911,21 @@ def test_ogr_osm_tags_json():
assert lyr_defn.GetFieldDefn(other_tags_idx).GetSubType() == ogr.OFSTJSON
f = lyr.GetNextFeature()
assert f["other_tags"] == '{"foo":"bar"}'


###############################################################################
# Test TAGS_FORMAT=JSON


def test_ogr_osm_tags_json_special_characters():

ds = gdal.OpenEx("data/osm/test_json.pbf", open_options=["TAGS_FORMAT=JSON"])

lyr = ds.GetLayerByName("points")
lyr_defn = lyr.GetLayerDefn()
other_tags_idx = lyr_defn.GetFieldIndex("other_tags")
assert other_tags_idx >= 0
assert lyr_defn.GetFieldDefn(other_tags_idx).GetType() == ogr.OFTString
assert lyr_defn.GetFieldDefn(other_tags_idx).GetSubType() == ogr.OFSTJSON
f = lyr.GetNextFeature()
assert f["other_tags"] == """{"foo":"x'\\\\\\"\\t\\n\\ry"}"""
3 changes: 1 addition & 2 deletions ogr/ogrsf_frmts/osm/ogr_osm.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,7 @@ class OGROSMLayer final : public OGRLayer

bool m_bHasWarnedTooManyFeatures = false;

char *m_pszAllTags = nullptr;
bool m_bHasWarnedAllTagsTruncated = false;
std::string m_osAllTagsBuffer{};

bool m_bUserInterested = true;

Expand Down
119 changes: 64 additions & 55 deletions ogr/ogrsf_frmts/osm/ogrosmlayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@
constexpr int SWITCH_THRESHOLD = 10000;
constexpr int MAX_THRESHOLD = 100000;

constexpr int ALLTAGS_LENGTH = 8192;

/************************************************************************/
/* OGROSMLayer() */
/************************************************************************/
Expand All @@ -69,8 +67,7 @@ OGROSMLayer::OGROSMLayer(OGROSMDataSource *poDSIn, int nIdxLayerIn,
const char *pszName)
: m_poDS(poDSIn), m_nIdxLayer(nIdxLayerIn),
m_poFeatureDefn(new OGRFeatureDefn(pszName)),
m_poSRS(new OGRSpatialReference()),
m_pszAllTags(static_cast<char *>(CPLMalloc(ALLTAGS_LENGTH)))
m_poSRS(new OGRSpatialReference())
{
SetDescription(m_poFeatureDefn->GetName());
m_poFeatureDefn->Reference();
Expand Down Expand Up @@ -113,8 +110,6 @@ OGROSMLayer::~OGROSMLayer()
sqlite3_finalize(m_oComputedAttributes[i].hStmt);
}

CPLFree(m_pszAllTags);

CPLFree(m_papoFeatures);
}

Expand Down Expand Up @@ -520,25 +515,61 @@ int OGROSMLayer::AddInOtherOrAllTags(const char *pszK)
}

/************************************************************************/
/* OGROSMEscapeString() */
/* OGROSMEscapeStringHSTORE() */
/************************************************************************/

static int OGROSMEscapeString(const char *pszV, char *pszAllTags)
static void OGROSMEscapeStringHSTORE(const char *pszV, std::string &sOut)
{
int nAllTagsOff = 0;

pszAllTags[nAllTagsOff++] = '"';
sOut += '"';

for (int k = 0; pszV[k] != '\0'; k++)
{
if (pszV[k] == '"' || pszV[k] == '\\')
pszAllTags[nAllTagsOff++] = '\\';
pszAllTags[nAllTagsOff++] = pszV[k];
sOut += '\\';
sOut += pszV[k];
}

pszAllTags[nAllTagsOff++] = '"';
sOut += '"';
}

/************************************************************************/
/* OGROSMEscapeStringJSON() */
/************************************************************************/

static void OGROSMEscapeStringJSON(const char *pszV, std::string &sOut)
{
sOut += '"';

return nAllTagsOff;
for (int k = 0; pszV[k] != '\0'; k++)
{
const char ch = pszV[k];
switch (ch)
{
case '"':
sOut += "\\\"";
break;
case '\\':
sOut += "\\\\";
break;
case '\n':
sOut += "\\n";
break;
case '\r':
sOut += "\\r";
break;
case '\t':
sOut += "\\t";
break;
default:
if (static_cast<unsigned char>(ch) < ' ')
sOut += CPLSPrintf("\\u%04X", ch);
else
sOut += ch;
break;
}
}

sOut += '"';
}

/************************************************************************/
Expand Down Expand Up @@ -627,7 +658,7 @@ void OGROSMLayer::SetFieldsFromTags(OGRFeature *poFeature, GIntBig nID,
poFeature->SetField("osm_changeset", (int)psInfo->nChangeset);
}

int nAllTagsOff = 0;
m_osAllTagsBuffer.clear();
for (unsigned int j = 0; j < nTags; j++)
{
const char *pszK = pasTags[j].pszK;
Expand All @@ -643,48 +674,27 @@ void OGROSMLayer::SetFieldsFromTags(OGRFeature *poFeature, GIntBig nID,
{
if (AddInOtherOrAllTags(pszK))
{
const int nLenK = static_cast<int>(strlen(pszK));
const int nLenV = static_cast<int>(strlen(pszV));
const int nLenKEscaped = 1 + 2 * nLenK + 1;
const int nLenVEscaped = 1 + 2 * nLenV + 1;
// 3 is either for
// - HSTORE: ',' separator and '=>'
// - JSON: leading '{' or ',', ':' and closing '}'
if (nAllTagsOff + nLenKEscaped + nLenVEscaped + 3 >=
ALLTAGS_LENGTH - 1)
{
if (!m_bHasWarnedAllTagsTruncated)
CPLDebug("OSM",
"all_tags/other_tags field truncated for "
"feature " CPL_FRMT_GIB,
nID);
m_bHasWarnedAllTagsTruncated = true;
continue;
}

if (m_poDS->m_bTagsAsHSTORE)
{
if (nAllTagsOff)
m_pszAllTags[nAllTagsOff++] = ',';
if (!m_osAllTagsBuffer.empty())
m_osAllTagsBuffer += ',';

nAllTagsOff +=
OGROSMEscapeString(pszK, m_pszAllTags + nAllTagsOff);
OGROSMEscapeStringHSTORE(pszK, m_osAllTagsBuffer);

m_pszAllTags[nAllTagsOff++] = '=';
m_pszAllTags[nAllTagsOff++] = '>';
m_osAllTagsBuffer += '=';
m_osAllTagsBuffer += '>';

nAllTagsOff +=
OGROSMEscapeString(pszV, m_pszAllTags + nAllTagsOff);
OGROSMEscapeStringHSTORE(pszV, m_osAllTagsBuffer);
}
else
{
m_pszAllTags[nAllTagsOff] = nAllTagsOff ? ',' : '{';
nAllTagsOff++;
nAllTagsOff +=
OGROSMEscapeString(pszK, m_pszAllTags + nAllTagsOff);
m_pszAllTags[nAllTagsOff++] = ':';
nAllTagsOff +=
OGROSMEscapeString(pszV, m_pszAllTags + nAllTagsOff);
if (!m_osAllTagsBuffer.empty())
m_osAllTagsBuffer += ',';
else
m_osAllTagsBuffer = '{';
OGROSMEscapeStringJSON(pszK, m_osAllTagsBuffer);
m_osAllTagsBuffer += ':';
OGROSMEscapeStringJSON(pszV, m_osAllTagsBuffer);
}
}

Expand All @@ -698,17 +708,16 @@ void OGROSMLayer::SetFieldsFromTags(OGRFeature *poFeature, GIntBig nID,
}
}

if (nAllTagsOff)
if (!m_osAllTagsBuffer.empty())
{
if (!m_poDS->m_bTagsAsHSTORE)
{
m_pszAllTags[nAllTagsOff++] = '}';
m_osAllTagsBuffer += '}';
}
m_pszAllTags[nAllTagsOff] = '\0';
if (m_nIndexAllTags >= 0)
poFeature->SetField(m_nIndexAllTags, m_pszAllTags);
poFeature->SetField(m_nIndexAllTags, m_osAllTagsBuffer.c_str());
else
poFeature->SetField(m_nIndexOtherTags, m_pszAllTags);
poFeature->SetField(m_nIndexOtherTags, m_osAllTagsBuffer.c_str());
}

for (size_t i = 0; i < m_oComputedAttributes.size(); i++)
Expand Down

0 comments on commit 3312a99

Please sign in to comment.