From 0b6717c977e31c729717b41cc3bb44e5b310d24f Mon Sep 17 00:00:00 2001 From: devonh Date: Wed, 26 Jun 2024 15:12:12 +0000 Subject: [PATCH] Truncate large values inside of Firebase notification body fields. (#386) * Truncate large fields within gcm notification body * Add changelog entry * Fix terminology * Add ellipsis add end of truncated content fields * Add ellipsis after full max bytes since it's an arbitrary size anyway * Add warning log when firebase notification contains too many large fields * Use proper ellipsis Co-authored-by: Erik Johnston * Fix new ellipsis tests * Add unicode support when truncating large fields * Remove redundant bytes conversion during truncation * Add docs for truncate_str * Clarify func docs * Remove whitespace * Use Tuple instead of tuple in type signature --------- Co-authored-by: Erik Johnston --- changelog.d/386.bugfix | 1 + sygnal/gcmpushkin.py | 48 ++++++++++++++++++- tests/test_gcm.py | 102 +++++++++++++++++++++++++++++++++++++++++ tests/testutils.py | 94 +++++++++++++++++++++++++++++++++++++ 4 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 changelog.d/386.bugfix diff --git a/changelog.d/386.bugfix b/changelog.d/386.bugfix new file mode 100644 index 00000000..cecc0bbb --- /dev/null +++ b/changelog.d/386.bugfix @@ -0,0 +1 @@ +Truncate large values inside of Firebase notification content fields. diff --git a/sygnal/gcmpushkin.py b/sygnal/gcmpushkin.py index 8afbf8bb..0f83f1ff 100644 --- a/sygnal/gcmpushkin.py +++ b/sygnal/gcmpushkin.py @@ -85,6 +85,11 @@ RETRY_DELAY_BASE = 10 RETRY_DELAY_BASE_QUOTA_EXCEEDED = 60 MAX_BYTES_PER_FIELD = 1024 +MAX_FIREBASE_MESSAGE_SIZE = 4096 + +# Subtract 1 since the combined size of the other non-overflowing fields will push it over the +# edge otherwise. +MAX_NOTIFICATION_OVERFLOW_FIELDS = MAX_FIREBASE_MESSAGE_SIZE / MAX_BYTES_PER_FIELD - 1 AUTH_SCOPES = ["https://www.googleapis.com/auth/firebase.messaging"] @@ -676,6 +681,7 @@ def _build_data( JSON-compatible dict or None if the default_payload is misconfigured """ data = {} + overflow_fields = 0 if device.data: default_payload = device.data.get("default_payload", {}) @@ -702,14 +708,23 @@ def _build_data( data[attr] = getattr(n, attr) # Truncate fields to a sensible maximum length. If the whole # body is too long, GCM will reject it. - if data[attr] is not None and len(data[attr]) > MAX_BYTES_PER_FIELD: - data[attr] = data[attr][0:MAX_BYTES_PER_FIELD] + if data[attr] is not None and isinstance(data[attr], str): + # The only `attr` that shouldn't be of type `str` is `content`, + # which is handled explicitly later on. + data[attr], truncated = truncate_str( + data[attr], MAX_BYTES_PER_FIELD + ) + if truncated: + overflow_fields += 1 if api_version is APIVersion.V1: if isinstance(data.get("content"), dict): for attr, value in data["content"].items(): if not isinstance(value, str): continue + value, truncated = truncate_str(value, MAX_BYTES_PER_FIELD) + if truncated: + overflow_fields += 1 data["content_" + attr] = value del data["content"] @@ -725,4 +740,33 @@ def _build_data( data["unread"] = str(n.counts.unread) data["missed_calls"] = str(n.counts.missed_calls) + if overflow_fields > MAX_NOTIFICATION_OVERFLOW_FIELDS: + logger.warning( + "Payload contains too many overflowing fields. Notification likely to be rejected by Firebase." + ) + return data + + +def truncate_str(input: str, max_bytes: int) -> Tuple[str, bool]: + """ + Truncate the given string. If the truncation would occur in the middle of a unicode + character, that character will be removed entirely instead. + Appends a `…` character to the resulting string when truncation occurs. + + Args: + `input`: the string to be truncated + `max_bytes`: maximum length, in bytes, that the payload should occupy when truncated + + Returns: + Tuple of (truncated string, whether truncation took place) + """ + + str_bytes = input.encode("utf-8") + if len(str_bytes) <= max_bytes: + return (input, False) + + try: + return (str_bytes[: max_bytes - 3].decode("utf-8") + "…", True) + except UnicodeDecodeError as err: + return (str_bytes[: err.start].decode("utf-8") + "…", True) diff --git a/tests/test_gcm.py b/tests/test_gcm.py index cc63df56..86e23da3 100644 --- a/tests/test_gcm.py +++ b/tests/test_gcm.py @@ -546,3 +546,105 @@ def test_fcm_options(self) -> None: assert gcm.last_request_body is not None self.assertEqual(gcm.last_request_body["mutable_content"], True) self.assertEqual(gcm.last_request_body["content_available"], True) + + def test_api_v1_large_fields(self) -> None: + """ + Tests the gcm pushkin truncates unusually large fields. Includes large + fields both at the top level of `data`, and nested within `content`. + """ + self.apns_pushkin_snotif = MagicMock() + gcm = self.get_test_pushkin("com.example.gcm.apiv1") + gcm.preload_with_response( + 200, {"results": [{"message_id": "msg42", "registration_id": "spqr"}]} + ) + + # type safety: using ignore here due to mypy not handling monkeypatching, + # see https://github.com/python/mypy/issues/2427 + gcm._request_dispatch = self.apns_pushkin_snotif # type: ignore[assignment] # noqa: E501 + + method = self.apns_pushkin_snotif + method.side_effect = testutils.make_async_magic_mock(([], [])) + + resp = self._request( + self._make_dummy_notification_large_fields([DEVICE_EXAMPLE_APIV1]) + ) + + self.assertEqual(1, method.call_count) + notification_req = method.call_args.args + + # The values for `room_name` & `content_other` should be truncated from the original. + self.assertEqual( + { + "message": { + "data": { + "event_id": "$qTOWWTEL48yPm3uT-gdNhFcoHxfKbZuqRVnnWWSkGBs", + "type": "m.room.message", + "sender": "@exampleuser:matrix.org", + "room_name": "xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooox…", + "room_alias": "#exampleroom:matrix.org", + "membership": None, + "sender_display_name": "Major Tom", + "content_msgtype": "m.text", + "content_body": "I'm floating in a most peculiar way.", + "content_other": "xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxx🦉oooooo£xxxxxxxx☻oo🦉…", + "room_id": "!slw48wfj34rtnrf:example.com", + "prio": "high", + "unread": "2", + "missed_calls": "1", + }, + "android": { + "notification": { + "body": { + "test body", + }, + }, + "priority": "high", + }, + "apns": { + "payload": { + "aps": { + "content-available": 1, + "mutable-content": 1, + "alert": "", + }, + }, + }, + "token": "spqr", + } + }, + notification_req[2], + ) + + self.assertEqual(resp, {"rejected": []}) + assert notification_req[3] is not None + self.assertEqual( + notification_req[3].get("Authorization"), ["Bearer myaccesstoken"] + ) diff --git a/tests/testutils.py b/tests/testutils.py index 25898418..1a6112eb 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -136,6 +136,100 @@ def _make_dummy_notification_badge_only(self, devices): } } + # NOTE: The `⚑` character (len 3 bytes) is inserted at byte position 1020 (occupying 1020-1022). + # This will make the truncation (which is `str[: 1024 - 3]`) occur in the middle of a unicode + # character. The truncation logic should recognize this and return the string starting before + # the `⚑`, with a `…` appended to indicate the string was truncated. + def _make_dummy_notification_large_fields(self, devices): + return { + "notification": { + "id": "$3957tyerfgewrf384", + "room_id": "!slw48wfj34rtnrf:example.com", + "event_id": "$qTOWWTEL48yPm3uT-gdNhFcoHxfKbZuqRVnnWWSkGBs", + "type": "m.room.message", + "sender": "@exampleuser:matrix.org", + "sender_display_name": "Major Tom", + "room_name": "xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo", + "room_alias": "#exampleroom:matrix.org", + "prio": "high", + "content": { + "msgtype": "m.text", + "body": "I'm floating in a most peculiar way.", + "other": "xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxx🦉oooooo£xxxxxxxx☻oo🦉⚑xxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo\ +xxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxx\ +ooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxooooooooooxxxxxxxxxxoooooooooo", + }, + "counts": {"unread": 2, "missed_calls": 1}, + "devices": devices, + } + } + def _request(self, payload: Union[str, dict]) -> Union[dict, int]: """ Make a dummy request to the notify endpoint with the specified payload