Skip to content

Commit

Permalink
Add some debug to keep track of client state desynchronisation
Browse files Browse the repository at this point in the history
  • Loading branch information
richvdh committed Feb 22, 2024
1 parent e200023 commit de613bb
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 0 deletions.
1 change: 1 addition & 0 deletions changelog.d/16952.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add some debug to keep track of client state desynchronisation.
120 changes: 120 additions & 0 deletions synapse/handlers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@

logger = logging.getLogger(__name__)

# Logging for https://github.com/matrix-org/matrix-spec/issues/1209 and
# https://github.com/element-hq/synapse/issues/16940
client_state_desync_logger = logging.getLogger("synapse.client_state_desync_debug")

# Counts the number of times we returned a non-empty sync. `type` is one of
# "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is
# "true" or "false" depending on if the request asked for lazy loaded members or
Expand Down Expand Up @@ -262,6 +266,17 @@ def __bool__(self) -> bool:
)


@attr.s(slots=True, auto_attribs=True)
class ClientCalculatedMembershipStateErrorEntry:
"""Difference between the membership states calculated by the client and the server
Returned by `SyncHandler._calculate_state_error`.
"""

actual: Optional[str]
calculated: Optional[str]


class SyncHandler:
def __init__(self, hs: "HomeServer"):
self.hs_config = hs.config
Expand Down Expand Up @@ -1204,6 +1219,20 @@ async def _compute_state_delta_for_full_sync(
previous_timeline_end={},
lazy_load_members=lazy_load_members,
)

if client_state_desync_logger.isEnabledFor(logging.WARNING):
current_state = await self._state_storage_controller.get_current_state_ids(
room_id, state_filter, await_full_state=await_full_state
)
await self._log_client_state_desync(
room_id,
{},
state_at_timeline_end,
state_ids,
timeline_state,
current_state,
)

return state_ids

async def _compute_state_delta_for_incremental_sync(
Expand Down Expand Up @@ -1304,6 +1333,19 @@ async def _compute_state_delta_for_incremental_sync(
lazy_load_members=lazy_load_members,
)

if client_state_desync_logger.isEnabledFor(logging.WARNING):
current_state = await self._state_storage_controller.get_current_state_ids(
room_id, state_filter, await_full_state=await_full_state
)
await self._log_client_state_desync(
room_id,
{},
state_at_timeline_end,
state_ids,
timeline_state,
current_state,
)

return state_ids

async def _find_missing_partial_state_memberships(
Expand Down Expand Up @@ -1420,6 +1462,84 @@ async def _find_missing_partial_state_memberships(

return additional_state_ids

async def _log_client_state_desync(
self,
room_id: str,
state_at_previous_timeline_end: StateMap[str],
state_at_timeline_end: StateMap[str],
sync_response_state_state: StateMap[str],
sync_response_timeline_state: StateMap[str],
current_state: StateMap[str],
) -> None:
# Tracking to see how often https://github.com/matrix-org/matrix-spec/issues/1209 bites
#
# Clients are supposed to be able to track the state of the room by applying all of
# the state updates they see to their in-memory state map. We are interested in
# knowing how often that algorithm fails.
client_calculated_state = dict(state_at_previous_timeline_end)
client_calculated_state.update(sync_response_state_state)
client_calculated_state.update(sync_response_timeline_state)

spec1209_error = await self._calculate_state_error(
state_at_timeline_end, client_calculated_state
)
if spec1209_error:
logger.warning(
"matrix-spec#1209 discrepancy in room %s: %s",
room_id,
spec1209_error,
)

# A similar calculation for https://github.com/element-hq/synapse/issues/16940
synapse_16940_error = await self._calculate_state_error(
current_state, client_calculated_state
)
if synapse_16940_error:
logger.warning(
"synapse#16940 discrepancy in room %s: %s",
room_id,
synapse_16940_error,
)

async def _calculate_state_error(
self, actual_state: StateMap[str], client_calculated_state: StateMap[str]
) -> Mapping[str, ClientCalculatedMembershipStateErrorEntry]:
error_map: Dict[str, ClientCalculatedMembershipStateErrorEntry] = {}

async def event_id_to_membership(event_id: Optional[str]) -> Optional[str]:
if event_id is None:
return None
event = await self.store.get_event(event_id)
return event.membership

# Check for entries in the calculated state which differ from the actual state
for (
event_type,
state_key,
), calculated_event_id in client_calculated_state.items():
if event_type != EventTypes.Member:
continue

actual_event_id = actual_state.get((event_type, state_key))
if calculated_event_id != actual_event_id:
error_map[state_key] = ClientCalculatedMembershipStateErrorEntry(
actual=await event_id_to_membership(actual_event_id),
calculated=await event_id_to_membership(calculated_event_id),
)

# Check for entries which are missing altogether.
for (event_type, state_key), actual_event_id in actual_state.items():
if event_type != EventTypes.Member:
continue

if state_key not in client_calculated_state:
error_map[state_key] = ClientCalculatedMembershipStateErrorEntry(
actual=await event_id_to_membership(actual_event_id),
calculated=None,
)

return error_map

async def unread_notifs_for_room_id(
self, room_id: str, sync_config: SyncConfig
) -> RoomNotifCounts:
Expand Down

0 comments on commit de613bb

Please sign in to comment.