Skip to content

Commit

Permalink
Add unicode support when truncating large fields
Browse files Browse the repository at this point in the history
  • Loading branch information
devonh committed Jun 24, 2024
1 parent 122a252 commit 11916b5
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 8 deletions.
21 changes: 16 additions & 5 deletions sygnal/gcmpushkin.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,18 +705,21 @@ def _build_data(
data[attr] = getattr(n, attr)
# Truncate fields to a sensible maximum length. If the whole
# body is too long, GCM will reject it.
if data[attr] is not None and len(data[attr]) > MAX_BYTES_PER_FIELD:
overflow_fields += 1
data[attr] = data[attr][0:MAX_BYTES_PER_FIELD]
if data[attr] is not None and isinstance(data[attr], str):
# The only `attr` that shouldn't be of type `str` is `content`,
# which is handled explicitly later on.
if len(bytes(data[attr], "utf-8")) > MAX_BYTES_PER_FIELD:
overflow_fields += 1
data[attr] = truncate_str(data[attr], MAX_BYTES_PER_FIELD)

if api_version is APIVersion.V1:
if isinstance(data.get("content"), dict):
for attr, value in data["content"].items():
if not isinstance(value, str):
continue
if len(value) > MAX_BYTES_PER_FIELD:
if len(bytes(value, "utf-8")) > MAX_BYTES_PER_FIELD:
overflow_fields += 1
value = value[: MAX_BYTES_PER_FIELD - 3] + "…"
value = truncate_str(value, MAX_BYTES_PER_FIELD)
data["content_" + attr] = value
del data["content"]

Expand All @@ -738,3 +741,11 @@ def _build_data(
)

return data


def truncate_str(input: str, max_length: int) -> str:
str_bytes = input.encode("utf-8")
try:
return str_bytes[: max_length - 3].decode("utf-8") + "…"
except UnicodeDecodeError as err:
return str_bytes[: err.start].decode("utf-8") + "…"
4 changes: 2 additions & 2 deletions tests/test_gcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ def test_api_v1_large_fields(self) -> None:
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxx",
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooox…",
"room_alias": "#exampleroom:matrix.org",
"membership": None,
"sender_display_name": "Major Tom",
Expand All @@ -548,7 +548,7 @@ def test_api_v1_large_fields(self) -> None:
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooox…",
ooooooooooxxxxxxxxxx🦉oooooo£xxxxxxxx☻oo🦉…",
"room_id": "!slw48wfj34rtnrf:example.com",
"prio": "high",
"unread": "2",
Expand Down
6 changes: 5 additions & 1 deletion tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ def _make_dummy_notification_badge_only(self, devices):
}
}

# NOTE: The `⚑` character (len 3 bytes) is inserted at byte position 1020 (occupying 1020-1022).
# This will make the truncation (which is `str[: 1024 - 3]`) occur in the middle of a unicode
# character. The truncation logic should recognize this and return the string starting before
# the `⚑`, with a `…` appended to indicate the string was truncated.
def _make_dummy_notification_large_fields(self, devices):
return {
"notification": {
Expand Down Expand Up @@ -199,7 +203,7 @@ def _make_dummy_notification_large_fields(self, devices):
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\
ooooooooooxxxxxxxxxx🦉oooooo£xxxxxxxx☻oo🦉⚑xxxxxxxxxxoooooooooo\
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\
xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\
Expand Down

0 comments on commit 11916b5

Please sign in to comment.