Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter added to Admin-API GET /rooms #17276

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/17276.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Filter for public and empty rooms added to Admin-API [List Room API](https://element-hq.github.io/synapse/latest/admin_api/rooms.html#list-room-api).
4 changes: 4 additions & 0 deletions docs/admin_api/rooms.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ The following query parameters are available:
- the room's name,
- the local part of the room's canonical alias, or
- the complete (local and server part) room's id (case sensitive).
* `public_rooms` - Optional flag to filter public rooms. If `true`, only public rooms are queried. If `false`, public rooms are excluded from
the query. When the flag is absent (the default), **both** public and non-public rooms are included in the search results.
* `empty_rooms` - Optional flag to filter empty rooms. A room is empty if joined_members is zero. If `true`, only empty rooms are queried. If `false`, empty rooms are excluded from
the query. When the flag is absent (the default), **both** empty and non-empty rooms are included in the search results.

Defaults to no filtering.

Expand Down
13 changes: 12 additions & 1 deletion synapse/rest/admin/rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
ResolveRoomIdMixin,
RestServlet,
assert_params_in_dict,
parse_boolean,
parse_enum,
parse_integer,
parse_json,
Expand Down Expand Up @@ -242,13 +243,23 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
errcode=Codes.INVALID_PARAM,
)

public_rooms = parse_boolean(request, "public_rooms")
empty_rooms = parse_boolean(request, "empty_rooms")

direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
reverse_order = True if direction == Direction.BACKWARDS else False

# Return list of rooms according to parameters
rooms, total_rooms = await self.store.get_rooms_paginate(
start, limit, order_by, reverse_order, search_term
start,
limit,
order_by,
reverse_order,
search_term,
public_rooms,
empty_rooms,
)

response = {
# next_token should be opaque, so return a value the client can parse
"offset": start,
Expand Down
51 changes: 37 additions & 14 deletions synapse/storage/databases/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,8 @@ async def get_rooms_paginate(
order_by: str,
reverse_order: bool,
search_term: Optional[str],
public_rooms: Optional[bool],
empty_rooms: Optional[bool],
) -> Tuple[List[Dict[str, Any]], int]:
"""Function to retrieve a paginated list of rooms as json.

Expand All @@ -617,30 +619,49 @@ async def get_rooms_paginate(
search_term: a string to filter room names,
canonical alias and room ids by.
Room ID must match exactly. Canonical alias must match a substring of the local part.
public_rooms: Optional flag to filter public and non-public rooms. If true, public rooms are queried.
if false, public rooms are excluded from the query. When it is
none (the default), both public rooms and none-public-rooms are queried.
empty_rooms: Optional flag to filter empty and non-empty rooms.
A room is empty if joined_members is zero.
If true, empty rooms are queried.
if false, empty rooms are excluded from the query. When it is
none (the default), both empty rooms and none-empty rooms are queried.
Returns:
A list of room dicts and an integer representing the total number of
rooms that exist given this query
"""
# Filter room names by a string
where_statement = ""
search_pattern: List[object] = []
filter_ = []
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
where_args = []
if search_term:
where_statement = """
WHERE LOWER(state.name) LIKE ?
OR LOWER(state.canonical_alias) LIKE ?
OR state.room_id = ?
"""
filter_ = [
"LOWER(state.name) LIKE ? OR "
"LOWER(state.canonical_alias) LIKE ? OR "
"state.room_id = ?"
]
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved

# Our postgres db driver converts ? -> %s in SQL strings as that's the
# placeholder for postgres.
# HOWEVER, if you put a % into your SQL then everything goes wibbly.
# To get around this, we're going to surround search_term with %'s
# before giving it to the database in python instead
search_pattern = [
"%" + search_term.lower() + "%",
"#%" + search_term.lower() + "%:%",
where_args = [
f"%{search_term.lower()}%",
f"#%{search_term.lower()}%:%",
search_term,
]
if public_rooms is not None:
filter_arg = "1" if public_rooms else "0"
filter_.append(f"rooms.is_public = '{filter_arg}'")

if empty_rooms is not None:
if empty_rooms:
filter_.append("curr.joined_members = 0")
else:
filter_.append("curr.joined_members <> 0")

where_clause = "WHERE " + " AND ".join(filter_) if len(filter_) > 0 else ""

# Set ordering
if RoomSortOrder(order_by) == RoomSortOrder.SIZE:
Expand Down Expand Up @@ -717,7 +738,7 @@ async def get_rooms_paginate(
LIMIT ?
OFFSET ?
""".format(
where=where_statement,
where=where_clause,
order_by=order_by_column,
direction="ASC" if order_by_asc else "DESC",
)
Expand All @@ -726,18 +747,20 @@ async def get_rooms_paginate(
count_sql = """
SELECT count(*) FROM (
SELECT room_id FROM room_stats_state state
INNER JOIN room_stats_current curr USING (room_id)
INNER JOIN rooms USING (room_id)
Comment on lines +750 to +751
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to estimate whether two more joins will have a significant impact on performance?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can EXPLAIN ANALYZE the query on a sizeable Matrix deployment, such as matrix.org.

Running the following query:

EXPLAIN ANALYZE SELECT count(*) FROM (
    SELECT room_id FROM room_stats_state state
    INNER JOIN room_stats_current curr USING (room_id)
    INNER JOIN rooms USING (room_id)
    WHERE LOWER(state.name) LIKE '%hello%'
        OR LOWER(state.canonical_alias) LIKE '%hello%'
        OR state.room_id = '!xyz:domain.com'
) AS get_room_ids;

which gives us:



                                                                                    QUERY PLAN                              
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=248961.11..248961.12 rows=1 width=8) (actual time=708.339..714.750 rows=1 loops=1)
   ->  Gather  (cost=248960.90..248961.11 rows=2 width=8) (actual time=708.028..714.726 rows=3 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         ->  Partial Aggregate  (cost=247960.90..247960.91 rows=1 width=8) (actual time=687.989..687.991 rows=1 loops=3)
               ->  Nested Loop  (cost=1.12..247930.10 rows=12320 width=0) (actual time=8.294..687.832 rows=658 loops=3)
                     Join Filter: (state.room_id = curr.room_id)
                     ->  Nested Loop  (cost=0.56..240275.86 rows=12320 width=62) (actual time=8.262..682.422 rows=658 loops=3)
                           ->  Parallel Seq Scan on room_stats_state state  (cost=0.00..212549.39 rows=12320 width=31) (actual time=8.210..676.798 rows=658 loops=3)
                                 Filter: ((lower(name) ~~ '%hello%'::text) OR (lower(canonical_alias) ~~ '%hello%'::text) OR (room_id = '!xyz:domain.com'::text))
                                 Rows Removed by Filter: 3097391
                           ->  Index Only Scan using rooms_pkey on rooms  (cost=0.56..2.25 rows=1 width=31) (actual time=0.008..0.008 rows=1 loops=1975)
                                 Index Cond: (room_id = state.room_id)
                                 Heap Fetches: 212
                     ->  Index Only Scan using room_stats_current_pkey on room_stats_current curr  (cost=0.56..0.61 rows=1 width=31) (actual time=0.007..0.007 rows=1 loops=1975)
                           Index Cond: (room_id = rooms.room_id)
                           Heap Fetches: 1144
 Planning Time: 0.830 ms
 JIT:
   Functions: 38
   Options: Inlining false, Optimization false, Expressions true, Deforming true
   Timing: Generation 2.227 ms, Inlining 0.000 ms, Optimization 0.961 ms, Emission 22.621 ms, Total 25.808 ms
 Execution Time: 715.623 ms
(23 rows)

(Note that I only analyzed after ensuring that a bare EXPLAIN would not be too costly.)

This is indeed fairly slow, and takes up a connection for nearly a whole second. As these filters are only using data from room_stats_state_current, do we really need the second INNER JOIN rooms USING (room_id)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I based the implementation on existing code (e.g. get_users_paginate_txn) The second join is used to determine the number of public or non-public rooms in the case of filtering. Perhaps it is also possible to determine the state of the flag via the events, but I would assume that this is not faster.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. In that case rooms is required. I've created a separate issue to track the slow performance of these queries: #17330

Otherwise this codepath is only hit for the /rooms admin API. Thus I think we can live with a slower, but more useful query.

{where}
) AS get_room_ids
""".format(
where=where_statement,
where=where_clause,
)

def _get_rooms_paginate_txn(
txn: LoggingTransaction,
) -> Tuple[List[Dict[str, Any]], int]:
# Add the search term into the WHERE clause
# and execute the data query
txn.execute(info_sql, search_pattern + [limit, start])
txn.execute(info_sql, where_args + [limit, start])

# Refactor room query data into a structured dictionary
rooms = []
Expand Down Expand Up @@ -767,7 +790,7 @@ def _get_rooms_paginate_txn(
# Execute the count query

# Add the search term into the WHERE clause if present
txn.execute(count_sql, search_pattern)
txn.execute(count_sql, where_args)

room_count = cast(Tuple[int], txn.fetchone())
return rooms, room_count[0]
Expand Down
77 changes: 77 additions & 0 deletions tests/rest/admin/test_room.py
Original file line number Diff line number Diff line change
Expand Up @@ -1795,6 +1795,83 @@ def test_search_term_non_ascii(self) -> None:
self.assertEqual(room_id, channel.json_body["rooms"][0].get("room_id"))
self.assertEqual("ж", channel.json_body["rooms"][0].get("name"))

def test_filter_public_rooms(self) -> None:
self.helper.create_room_as(
self.admin_user, tok=self.admin_user_tok, is_public=True
)
self.helper.create_room_as(
self.admin_user, tok=self.admin_user_tok, is_public=True
)
self.helper.create_room_as(
self.admin_user, tok=self.admin_user_tok, is_public=False
)

response = self.make_request(
"GET",
"/_synapse/admin/v1/rooms",
access_token=self.admin_user_tok,
)
self.assertEqual(200, response.code, msg=response.json_body)
self.assertEqual(3, response.json_body["total_rooms"])
self.assertEqual(3, len(response.json_body["rooms"]))

response = self.make_request(
"GET",
"/_synapse/admin/v1/rooms?public_rooms=true",
access_token=self.admin_user_tok,
)
self.assertEqual(200, response.code, msg=response.json_body)
self.assertEqual(2, response.json_body["total_rooms"])
self.assertEqual(2, len(response.json_body["rooms"]))

response = self.make_request(
"GET",
"/_synapse/admin/v1/rooms?public_rooms=false",
access_token=self.admin_user_tok,
)
self.assertEqual(200, response.code, msg=response.json_body)
self.assertEqual(1, response.json_body["total_rooms"])
self.assertEqual(1, len(response.json_body["rooms"]))

def test_filter_empty_rooms(self) -> None:
self.helper.create_room_as(
self.admin_user, tok=self.admin_user_tok, is_public=True
)
self.helper.create_room_as(
self.admin_user, tok=self.admin_user_tok, is_public=True
)
room_id = self.helper.create_room_as(
self.admin_user, tok=self.admin_user_tok, is_public=False
)
self.helper.leave(room_id, self.admin_user, tok=self.admin_user_tok)

response = self.make_request(
"GET",
"/_synapse/admin/v1/rooms",
access_token=self.admin_user_tok,
)
self.assertEqual(200, response.code, msg=response.json_body)
self.assertEqual(3, response.json_body["total_rooms"])
self.assertEqual(3, len(response.json_body["rooms"]))

response = self.make_request(
"GET",
"/_synapse/admin/v1/rooms?empty_rooms=false",
access_token=self.admin_user_tok,
)
self.assertEqual(200, response.code, msg=response.json_body)
self.assertEqual(2, response.json_body["total_rooms"])
self.assertEqual(2, len(response.json_body["rooms"]))

response = self.make_request(
"GET",
"/_synapse/admin/v1/rooms?empty_rooms=true",
access_token=self.admin_user_tok,
)
self.assertEqual(200, response.code, msg=response.json_body)
self.assertEqual(1, response.json_body["total_rooms"])
self.assertEqual(1, len(response.json_body["rooms"]))

def test_single_room(self) -> None:
"""Test that a single room can be requested correctly"""
# Create two test rooms
Expand Down
Loading