From e95d7e7e3239c0fe84448e1ef5314b4169dd6f7e Mon Sep 17 00:00:00 2001 From: Jesper Hodge Date: Mon, 20 May 2024 14:37:58 -0400 Subject: [PATCH] fix: libraries performance problem This is an attempt to fix a performance problem on the libraries home page. When you go to studio home and click on the libraries tab, on prod it will be quick for admins but extremely slow for course instructors (> 12 seconds) and leads to timeouts. It grows with the number of libraries that are assigned to the instructor. The Python code for the request to load libraries for a particular user goes through all existing libraries and then checks all of the user's roles for each library, which results in a complexity of O(l*r), l=libraries, r=roles. This PR improves the complexity to O(l). The BulkRoleCache and RoleCache classes were using a python set to store all roles for a particular user. A user can have a large number of roles, and lookup speed of iterating through a set is slow (O(n)). Most roles don't have the same course id, however. So if you have the course id of the role you're looking for, we can use a dict of course ids that contain related roles. The number of roles per course id is negligible, so we arrive at a lookup speed of O(1) when looking up a user's roles that belong to a specific course id. The BulkRoleCache now caches and stores user roles in a data structure like this: { user_id_1: { course_id_1: {role1, role2, role3}, # Set of roles associated with course_id_1 course_id_2: {role4, role5, role6}, # Set of roles associated with course_id_2 [ROLE_CACHE_UNGROUPED_ROLES_KEY]: {role7, role8} # Set of roles not tied to any specific course or library. For example, Global Staff roles. }, user_id_2: { ... } # Similar structure for another user } While this changes the data structure used to store roles under the hood and adds the new property `roles_by_course_id` to the RoleCache, when initializing the RoleCache will store roles additionally in the previous data structure - as a flat set - in the `_roles` property accessible via `all_roles_set`. This establishes backwards compatibility. We are now storing roles twice in the RoleCache (in each of the two data structures), which means this takes twice as much memory, but only in the scope of a request. --- .../contentstore/rest_api/v1/views/home.py | 1 + cms/djangoapps/contentstore/views/course.py | 2 +- common/djangoapps/student/auth.py | 3 +- common/djangoapps/student/role_helpers.py | 2 +- common/djangoapps/student/roles.py | 85 ++++++++++++++++--- common/djangoapps/student/tests/test_roles.py | 84 +++++++++++++++++- 6 files changed, 157 insertions(+), 20 deletions(-) diff --git a/cms/djangoapps/contentstore/rest_api/v1/views/home.py b/cms/djangoapps/contentstore/rest_api/v1/views/home.py index b06cec44b7d3..d41ceb2647c5 100644 --- a/cms/djangoapps/contentstore/rest_api/v1/views/home.py +++ b/cms/djangoapps/contentstore/rest_api/v1/views/home.py @@ -215,4 +215,5 @@ def get(self, request: Request): library_context = get_library_context(request) serializer = LibraryTabSerializer(library_context) + return Response(serializer.data) diff --git a/cms/djangoapps/contentstore/views/course.py b/cms/djangoapps/contentstore/views/course.py index b56c989b411e..dce82809dfad 100644 --- a/cms/djangoapps/contentstore/views/course.py +++ b/cms/djangoapps/contentstore/views/course.py @@ -52,7 +52,7 @@ CourseStaffRole, GlobalStaff, UserBasedRole, - OrgStaffRole + OrgStaffRole, ) from common.djangoapps.util.json_request import JsonResponse, JsonResponseBadRequest, expect_json from common.djangoapps.util.string_utils import _has_non_ascii_characters diff --git a/common/djangoapps/student/auth.py b/common/djangoapps/student/auth.py index fb2999eca98d..08b4255feeae 100644 --- a/common/djangoapps/student/auth.py +++ b/common/djangoapps/student/auth.py @@ -23,7 +23,7 @@ OrgContentCreatorRole, OrgInstructorRole, OrgLibraryUserRole, - OrgStaffRole + OrgStaffRole, ) # Studio permissions: @@ -106,6 +106,7 @@ def get_user_permissions(user, course_key, org=None): # Staff have all permissions except EDIT_ROLES: if OrgStaffRole(org=org).has_user(user) or (course_key and user_has_role(user, CourseStaffRole(course_key))): return STUDIO_VIEW_USERS | STUDIO_EDIT_CONTENT | STUDIO_VIEW_CONTENT + # Otherwise, for libraries, users can view only: if course_key and isinstance(course_key, LibraryLocator): if OrgLibraryUserRole(org=org).has_user(user) or user_has_role(user, LibraryUserRole(course_key)): diff --git a/common/djangoapps/student/role_helpers.py b/common/djangoapps/student/role_helpers.py index 8a12bfa0ac90..64ed5cc17efb 100644 --- a/common/djangoapps/student/role_helpers.py +++ b/common/djangoapps/student/role_helpers.py @@ -75,4 +75,4 @@ def get_course_roles(user: User) -> list[CourseAccessRole]: """ # pylint: disable=protected-access role_cache = get_role_cache(user) - return list(role_cache._roles) + return list(role_cache.all_roles_set) diff --git a/common/djangoapps/student/roles.py b/common/djangoapps/student/roles.py index 7bbd0cf92454..971433c9c523 100644 --- a/common/djangoapps/student/roles.py +++ b/common/djangoapps/student/roles.py @@ -4,9 +4,9 @@ """ +from collections import defaultdict import logging from abc import ABCMeta, abstractmethod -from collections import defaultdict from contextlib import contextmanager from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user @@ -23,6 +23,9 @@ # A mapping of roles to the roles that they inherit permissions from. ACCESS_ROLES_INHERITANCE = {} +# The key used to store roles for a user in the cache that do not belong to a course or do not have a course id. +ROLE_CACHE_UNGROUPED_ROLES__KEY = 'ungrouped' + def register_access_role(cls): """ @@ -60,21 +63,52 @@ def strict_role_checking(): ACCESS_ROLES_INHERITANCE.update(OLD_ACCESS_ROLES_INHERITANCE) +def get_role_cache_key_for_course(course_key=None): + """ + Get the cache key for the course key. + """ + return str(course_key) if course_key else ROLE_CACHE_UNGROUPED_ROLES__KEY + + class BulkRoleCache: # lint-amnesty, pylint: disable=missing-class-docstring + """ + This class provides a caching mechanism for roles grouped by users and courses, + using a nested dictionary structure to optimize lookup performance. The cache structure is designed as follows: + + { + user_id_1: { + course_id_1: {role1, role2, role3}, # Set of roles associated with course_id_1 + course_id_2: {role4, role5, role6}, # Set of roles associated with course_id_2 + [ROLE_CACHE_UNGROUPED_ROLES_KEY]: {role7, role8} # Set of roles not tied to any specific course or library + }, + user_id_2: { ... } # Similar structure for another user + } + + - Each top-level dictionary entry keys by `user_id` to access role data for a specific user. + - Nested within each user's dictionary, entries are keyed by `course_id` grouping roles by course. + - The special key `ROLE_CACHE_UNGROUPED_ROLES_KEY` (a constant defined above) + stores roles that are not associated with any specific course or library. + """ + CACHE_NAMESPACE = "student.roles.BulkRoleCache" CACHE_KEY = 'roles_by_user' @classmethod def prefetch(cls, users): # lint-amnesty, pylint: disable=missing-function-docstring - roles_by_user = defaultdict(set) + roles_by_user = defaultdict(lambda: defaultdict(set)) get_cache(cls.CACHE_NAMESPACE)[cls.CACHE_KEY] = roles_by_user for role in CourseAccessRole.objects.filter(user__in=users).select_related('user'): - roles_by_user[role.user.id].add(role) + user_id = role.user.id + course_id = get_role_cache_key_for_course(role.course_id) + + # Add role to the set in roles_by_user[user_id][course_id] + user_roles_set_for_course = roles_by_user[user_id][course_id] + user_roles_set_for_course.add(role) users_without_roles = [u for u in users if u.id not in roles_by_user] for user in users_without_roles: - roles_by_user[user.id] = set() + roles_by_user[user.id] = {} @classmethod def get_user_roles(cls, user): @@ -83,15 +117,32 @@ def get_user_roles(cls, user): class RoleCache: """ - A cache of the CourseAccessRoles held by a particular user + A cache of the CourseAccessRoles held by a particular user. + Internal data structures should be accessed by getter and setter methods; + don't use `_roles_by_course_id` or `_roles` directly. + _roles_by_course_id: This is the data structure as saved in the RequestCache. + It contains all roles for a user as a dict that's keyed by course_id. + The key ROLE_CACHE_UNGROUPED_ROLES__KEY is used for all roles + that are not associated with a course. + _roles: This is a set of all roles for a user, ungrouped. It's used for some types of + lookups and collected from _roles_by_course_id on initialization + so that it doesn't need to be recalculated. + """ def __init__(self, user): try: - self._roles = BulkRoleCache.get_user_roles(user) + self._roles_by_course_id = BulkRoleCache.get_user_roles(user) except KeyError: - self._roles = set( - CourseAccessRole.objects.filter(user=user).all() - ) + self._roles_by_course_id = {} + roles = CourseAccessRole.objects.filter(user=user).all() + for role in roles: + course_id = get_role_cache_key_for_course(role.course_id) + if not self._roles_by_course_id.get(course_id): + self._roles_by_course_id[course_id] = set() + self._roles_by_course_id[course_id].add(role) + self._roles = set() + for roles_for_course in self._roles_by_course_id.values(): + self._roles.update(roles_for_course) @staticmethod def get_roles(role): @@ -100,16 +151,24 @@ def get_roles(role): """ return ACCESS_ROLES_INHERITANCE.get(role, set()) | {role} + @property + def all_roles_set(self): + return self._roles + + @property + def roles_by_course_id(self): + return self._roles_by_course_id + def has_role(self, role, course_id, org): """ Return whether this RoleCache contains a role with the specified role or a role that inherits from the specified role, course_id and org. """ + course_id_string = get_role_cache_key_for_course(course_id) + course_roles = self._roles_by_course_id.get(course_id_string, []) return any( - access_role.role in self.get_roles(role) and - access_role.course_id == course_id and - access_role.org == org - for access_role in self._roles + access_role.role in self.get_roles(role) and access_role.org == org + for access_role in course_roles ) diff --git a/common/djangoapps/student/tests/test_roles.py b/common/djangoapps/student/tests/test_roles.py index 9037eb902f61..da1aad19a803 100644 --- a/common/djangoapps/student/tests/test_roles.py +++ b/common/djangoapps/student/tests/test_roles.py @@ -6,6 +6,7 @@ import ddt from django.test import TestCase from opaque_keys.edx.keys import CourseKey +from opaque_keys.edx.locator import LibraryLocator from common.djangoapps.student.roles import ( CourseAccessRole, @@ -22,7 +23,9 @@ OrgContentCreatorRole, OrgInstructorRole, OrgStaffRole, - RoleCache + RoleCache, + get_role_cache_key_for_course, + ROLE_CACHE_UNGROUPED_ROLES__KEY ) from common.djangoapps.student.role_helpers import get_course_roles, has_staff_roles from common.djangoapps.student.tests.factories import AnonymousUserFactory, InstructorFactory, StaffFactory, UserFactory @@ -35,7 +38,7 @@ class RolesTestCase(TestCase): def setUp(self): super().setUp() - self.course_key = CourseKey.from_string('edX/toy/2012_Fall') + self.course_key = CourseKey.from_string('course-v1:course-v1:edX+toy+2012_Fall') self.course_loc = self.course_key.make_usage_key('course', '2012_Fall') self.anonymous_user = AnonymousUserFactory() self.student = UserFactory() @@ -189,8 +192,9 @@ def test_get_orgs_for_user(self): @ddt.ddt class RoleCacheTestCase(TestCase): # lint-amnesty, pylint: disable=missing-class-docstring - IN_KEY = CourseKey.from_string('edX/toy/2012_Fall') - NOT_IN_KEY = CourseKey.from_string('edX/toy/2013_Fall') + IN_KEY_STRING = 'course-v1:edX+toy+2012_Fall' + IN_KEY = CourseKey.from_string(IN_KEY_STRING) + NOT_IN_KEY = CourseKey.from_string('course-v1:edX+toy+2013_Fall') ROLES = ( (CourseStaffRole(IN_KEY), ('staff', IN_KEY, 'edX')), @@ -233,3 +237,75 @@ def test_only_in_role(self, role, target): def test_empty_cache(self, role, target): # lint-amnesty, pylint: disable=unused-argument cache = RoleCache(self.user) assert not cache.has_role(*target) + + def test_get_role_cache_key_for_course_for_course_object_gets_string(self): + """ + Given a valid course key object, get_role_cache_key_for_course + should return the string representation of the key. + """ + course_string = 'course-v1:edX+toy+2012_Fall' + key = CourseKey.from_string(course_string) + key = get_role_cache_key_for_course(key) + assert key == course_string + + def test_get_role_cache_key_for_course_for_undefined_object_returns_default(self): + """ + Given a value None, get_role_cache_key_for_course + should return the default key for ungrouped courses. + """ + key = get_role_cache_key_for_course(None) + assert key == ROLE_CACHE_UNGROUPED_ROLES__KEY + + def test_role_cache_get_roles_set(self): + """ + Test that the RoleCache.all_roles_set getter method returns a flat set of all roles for a user + and that the ._roles attribute is the same as the set to avoid legacy behavior being broken. + """ + lib0 = LibraryLocator.from_string('library-v1:edX+quizzes') + course0 = CourseKey.from_string('course-v1:edX+toy+2012_Summer') + course1 = CourseKey.from_string('course-v1:edX+toy2+2013_Fall') + role_library_v1 = LibraryUserRole(lib0) + role_course_0 = CourseInstructorRole(course0) + role_course_1 = CourseInstructorRole(course1) + + role_library_v1.add_users(self.user) + role_course_0.add_users(self.user) + role_course_1.add_users(self.user) + + cache = RoleCache(self.user) + assert cache.has_role('library_user', lib0, 'edX') + assert cache.has_role('instructor', course0, 'edX') + assert cache.has_role('instructor', course1, 'edX') + + assert len(cache.all_roles_set) == 3 + roles_set = cache.all_roles_set + for role in roles_set: + assert role.course_id.course in ('quizzes', 'toy2', 'toy') + + assert roles_set == cache._roles # pylint: disable=protected-access + + def test_role_cache_roles_by_course_id(self): + """ + Test that the RoleCache.roles_by_course_id getter method returns a dictionary of roles for a user + that are grouped by course_id or if ungrouped by the ROLE_CACHE_UNGROUPED_ROLES__KEY. + """ + lib0 = LibraryLocator.from_string('library-v1:edX+quizzes') + course0 = CourseKey.from_string('course-v1:edX+toy+2012_Summer') + course1 = CourseKey.from_string('course-v1:edX+toy2+2013_Fall') + role_library_v1 = LibraryUserRole(lib0) + role_course_0 = CourseInstructorRole(course0) + role_course_1 = CourseInstructorRole(course1) + role_org_staff = OrgStaffRole('edX') + + role_library_v1.add_users(self.user) + role_course_0.add_users(self.user) + role_course_1.add_users(self.user) + role_org_staff.add_users(self.user) + + cache = RoleCache(self.user) + roles_dict = cache.roles_by_course_id + assert len(roles_dict) == 4 + assert roles_dict.get(ROLE_CACHE_UNGROUPED_ROLES__KEY).pop().role == 'staff' + assert roles_dict.get('library-v1:edX+quizzes').pop().course_id.course == 'quizzes' + assert roles_dict.get('course-v1:edX+toy+2012_Summer').pop().course_id.course == 'toy' + assert roles_dict.get('course-v1:edX+toy2+2013_Fall').pop().course_id.course == 'toy2'