feat: use course cache to retrieve course data

edx · Feb 26, 2024 · e60d711 · e60d711
1 parent fddf0bc
commit e60d711
Show file tree

Hide file tree

Showing 10 changed files with 103 additions and 38 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -14,6 +14,10 @@ Change Log
 Unreleased
 **********
 
+4.1.0 - 2024-02-26
+******************
+* Use course cache to inject course title and course skill names into prompt template.
+
 4.0.0 - 2024-02-21
 ******************
 * Remove use of course waffle flag. Use the django setting LEARNING_ASSISTANT_AVAILABLE

diff --git a/docs/decisions/0004-course-cache-use.rst b/docs/decisions/0004-course-cache-use.rst
@@ -0,0 +1,37 @@
+4. Use of the LMS course caches
+###############################
+
+Status
+******
+
+**Accepted** *2024-02-26*
+
+Context
+*******
+Each course run ID in edx-platform is associated with another course ID. While for many courses, the mapping between
+course run ID and course ID is straight forward, i.e. edX+testX+2019 -> edX+testX, this is not the case for every
+course on edX. The discovery service is the source of truth for mappings between course run ID and course IDs, and
+string manipulation cannot be relied on as an accurate way to map between the two forms of ID.
+
+The learning-assistant CourseChatView NEED LINK accepts a course run ID as a query parameter, but parts of the python API defined
+in the learning assistant backend also require the course ID associated with a given course.
+
+Decision
+********
+In order to determine the mapping between a course run ID and course ID in the learning-assistant app, we will make
+use of an existing cache that is defined in edx-platform (https://github.com/openedx/edx-platform/blob/c61df904c1d2a5f523f1da44460c21e17ec087ee/openedx/core/djangoapps/catalog/utils.py#L801).
+This is a long term cache with a TTL of 24 hours, and on a cache miss this code will call the discovery API directly.
+
+Consequences
+************
+* If the cache were to be removed, code in the learning-assistant repository would no longer function as expected.
+* On a cache miss, the learning-assistant backend will incur additional performance cost on calls to the discovery API.
+
+Rejected Alternatives
+*********************
+* Calling the discovery API directly from the learning-assistant backend
+    * This would require building a custom solution in the learning-assistant app to call the discovery service directly.
+    * Without a cache, this would impact performance on every call to the learning-assistant backend.
+* Using string manipulation to map course run ID to course ID.
+    * If we do not use the discovery service as our source of truth for course run ID to course ID mappings,
+      we run the risk of being unable to support courses that do not fit the usual pattern mapping.
diff --git a/learning_assistant/__init__.py b/learning_assistant/__init__.py
@@ -2,6 +2,6 @@
 Plugin for a learning assistant backend, intended for use within edx-platform.
 """
 
-__version__ = '4.0.0'
+__version__ = '4.1.0'
 
 default_app_config = 'learning_assistant.apps.LearningAssistantConfig'  # pylint: disable=invalid-name
diff --git a/learning_assistant/api.py b/learning_assistant/api.py
@@ -15,6 +15,8 @@
 from learning_assistant.platform_imports import (
     block_get_children,
     block_leaf_filter,
+    get_cache_course_data,
+    get_cache_course_run_data,
     get_single_block,
     get_text_transcript,
     traverse_block_pre_order,
@@ -101,19 +103,23 @@ def get_block_content(request, user_id, course_id, unit_usage_key):
     return cache_data['content_length'], cache_data['content_items']
 
 
-def render_prompt_template(request, user_id, course_id, unit_usage_key):
+def render_prompt_template(request, user_id, course_run_id, unit_usage_key, course_id):
     """
     Return a rendered prompt template, specified by the LEARNING_ASSISTANT_PROMPT_TEMPLATE setting.
     """
     unit_content = ''
 
-    course_run_key = CourseKey.from_string(course_id)
+    course_run_key = CourseKey.from_string(course_run_id)
     if unit_usage_key and course_content_enabled(course_run_key):
-        _, unit_content = get_block_content(request, user_id, course_id, unit_usage_key)
+        _, unit_content = get_block_content(request, user_id, course_run_id, unit_usage_key)
+
+    course_data = get_cache_course_data(course_id, ['skill_names', 'title'])
+    skill_names = course_data['skill_names']
+    title = course_data['title']
 
     template_string = getattr(settings, 'LEARNING_ASSISTANT_PROMPT_TEMPLATE', '')
     template = Environment(loader=BaseLoader).from_string(template_string)
-    data = template.render(unit_content=unit_content)
+    data = template.render(unit_content=unit_content, skill_names=skill_names, title=title)
     return data
 
 
@@ -168,3 +174,12 @@ def set_learning_assistant_enabled(course_key, enabled):
         course_key=obj.course_id,
         enabled=obj.enabled
     )
+
+
+def get_course_id(course_run_id):
+    """
+    Given a course run id (str), return the associated course key.
+    """
+    course_data = get_cache_course_run_data(course_run_id, ['course'])
+    course_key = course_data['course']
+    return course_key
diff --git a/learning_assistant/platform_imports.py b/learning_assistant/platform_imports.py
@@ -51,14 +51,26 @@ def get_cache_course_run_data(course_run_id, fields):
     """
     Return course run related data given a course run id.
 
-    This function makes use of the discovery course run cache, which is necessary because
-    only the discovery service stores the relation between courseruns and courses.
+    This function makes use of the course run cache in the LMS, which caches data from the discovery service. This is
+    necessary because only the discovery service stores the relation between courseruns and courses.
     """
     # pylint: disable=import-error, import-outside-toplevel
     from openedx.core.djangoapps.catalog.utils import get_course_run_data
     return get_course_run_data(course_run_id, fields)
 
 
+def get_cache_course_data(course_id, fields):
+    """
+    Return course related data given a course id.
+
+    This function makes use of the course cache in the LMS, which caches data from the discovery service. This is
+    necessary because only the discovery service stores course skills data.
+    """
+    # pylint: disable=import-error, import-outside-toplevel
+    from openedx.core.djangoapps.catalog.utils import get_course_data
+    return get_course_data(course_id, fields)
+
+
 def get_user_role(user, course_key):
     """
     Return the role of the user on the edX platform.

diff --git a/learning_assistant/utils.py b/learning_assistant/utils.py
@@ -10,7 +10,6 @@
 from requests.exceptions import ConnectTimeout
 from rest_framework import status as http_status
 
-from learning_assistant.platform_imports import get_cache_course_run_data
 
 log = logging.getLogger(__name__)
 
@@ -60,24 +59,15 @@ def get_reduced_message_list(prompt_template, message_list):
     return new_message_list
 
 
-def get_course_id(course_run_id):
-    """
-    Given a course run id (str), return the associated course key.
-    """
-    course_data = get_cache_course_run_data(course_run_id, ['course'])
-    course_key = course_data['course']
-    return course_key
-
-
-def create_request_body(prompt_template, message_list, courserun_id):
+def create_request_body(prompt_template, message_list, course_id):
     """
     Form request body to be passed to the chat endpoint.
     """
     response_body = {
         'context': {
             'content': prompt_template,
             'render': {
-                'doc_id': get_course_id(courserun_id),
+                'doc_id': course_id,
                 'fields': ['skillNames', 'title']
             }
         },
@@ -87,7 +77,7 @@ def create_request_body(prompt_template, message_list, courserun_id):
     return response_body
 
 
-def get_chat_response(prompt_template, message_list, courserun_id):
+def get_chat_response(prompt_template, message_list, course_id):
     """
     Pass message list to chat endpoint, as defined by the CHAT_COMPLETION_API setting.
     """
@@ -98,7 +88,7 @@ def get_chat_response(prompt_template, message_list, courserun_id):
         connect_timeout = getattr(settings, 'CHAT_COMPLETION_API_CONNECT_TIMEOUT', 1)
         read_timeout = getattr(settings, 'CHAT_COMPLETION_API_READ_TIMEOUT', 15)
 
-        body = create_request_body(prompt_template, message_list, courserun_id)
+        body = create_request_body(prompt_template, message_list, course_id)
 
         try:
             response = requests.post(

diff --git a/learning_assistant/views.py b/learning_assistant/views.py
@@ -19,7 +19,7 @@
 except ImportError:
     pass
 
-from learning_assistant.api import learning_assistant_enabled, render_prompt_template
+from learning_assistant.api import get_course_id, learning_assistant_enabled, render_prompt_template
 from learning_assistant.serializers import MessageSerializer
 from learning_assistant.utils import get_chat_response, user_role_is_staff
 
@@ -93,7 +93,12 @@ def post(self, request, course_id):
             }
         )
 
-        prompt_template = render_prompt_template(request, request.user.id, course_id, unit_id)
+        # the "course id" being passed in as a query parameter is really a course run id. For the sake of
+        # clarity here, the value is reassigned to a more aptly named variable.
+        course_run_id = course_id
+        course_id = get_course_id(course_run_id)
+
+        prompt_template = render_prompt_template(request, request.user.id, course_run_id, unit_id, course_id)
 
         status_code, message = get_chat_response(prompt_template, message_list, course_id)
 

diff --git a/tests/test_api.py b/tests/test_api.py
@@ -82,7 +82,7 @@ def setUp(self):
         ]
         self.block = FakeBlock(self.children)
 
-        self.course_id = 'course-v1:edx+test+23'
+        self.course_run_id = 'course-v1:edx+test+23'
 
     @ddt.data(
         ('video', True),
@@ -156,7 +156,7 @@ def test_get_block_content(self, mock_get_children_contents, mock_get_single_blo
         # args does not matter for this test right now, as the `get_single_block` function is entirely mocked.
         request = MagicMock()
         user_id = 1
-        course_id = self.course_id
+        course_id = self.course_run_id
         unit_usage_key = 'block-v1:edX+A+B+type@vertical+block@verticalD'
 
         length, items = get_block_content(request, user_id, course_id, unit_usage_key)
@@ -183,20 +183,25 @@ def test_get_block_content(self, mock_get_children_contents, mock_get_single_blo
         ('', False),
     )
     @ddt.unpack
+    @patch('learning_assistant.api.get_cache_course_data')
     @patch('learning_assistant.toggles._is_learning_assistant_waffle_flag_enabled')
     @patch('learning_assistant.api.get_block_content')
-    def test_render_prompt_template(self, unit_content, flag_enabled, mock_get_content, mock_is_flag_enabled):
+    def test_render_prompt_template(
+        self, unit_content, flag_enabled, mock_get_content, mock_is_flag_enabled, mock_cache
+    ):
         mock_get_content.return_value = (len(unit_content), unit_content)
         mock_is_flag_enabled.return_value = flag_enabled
+        mock_cache.return_value = {'skill_names': ['skills'], 'title': ['title']}
 
         # mock arguments that are passed through to `get_block_content` function. the value of these
         # args does not matter for this test right now, as the `get_block_content` function is entirely mocked.
         request = MagicMock()
         user_id = 1
-        course_id = self.course_id
+        course_run_id = self.course_run_id
         unit_usage_key = 'block-v1:edX+A+B+type@vertical+block@verticalD'
+        course_id = 'edx+test'
 
-        prompt_text = render_prompt_template(request, user_id, course_id, unit_usage_key)
+        prompt_text = render_prompt_template(request, user_id, course_run_id, unit_usage_key, course_id)
 
         if unit_content and flag_enabled:
             self.assertIn(unit_content, prompt_text)

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -24,14 +24,7 @@ def setUp(self):
         self.prompt_template = 'This is a prompt.'
 
         self.message_list = [{'role': 'assistant', 'content': 'Hello'}, {'role': 'user', 'content': 'Goodbye'}]
-        self.course_id = 'course-v1:edx+test+23'
-        self.course_key = 'edx-test'
-
-        self.patcher = patch(
-            'learning_assistant.utils.get_cache_course_run_data',
-            return_value={'course': self.course_key}
-        )
-        self.patcher.start()
+        self.course_id = 'edx+test'
 
     def get_response(self):
         return get_chat_response(self.prompt_template, self.message_list, self.course_id)
@@ -99,7 +92,7 @@ def test_post_request_structure(self, mock_requests):
             'context': {
                 'content': self.prompt_template,
                 'render': {
-                    'doc_id': self.course_key,
+                    'doc_id': self.course_id,
                     'fields': ['skillNames', 'title']
                 }
             },

diff --git a/tests/test_views.py b/tests/test_views.py
@@ -145,17 +145,21 @@ def test_invalid_messages(self, mock_role, mock_waffle, mock_render):
 
     @patch('learning_assistant.views.render_prompt_template')
     @patch('learning_assistant.views.get_chat_response')
+    @patch('learning_assistant.views.get_course_id')
     @patch('learning_assistant.views.learning_assistant_enabled')
     @patch('learning_assistant.views.get_user_role')
     @patch('learning_assistant.views.CourseEnrollment.get_enrollment')
     @patch('learning_assistant.views.CourseMode')
-    def test_chat_response(self, mock_mode, mock_enrollment, mock_role, mock_waffle, mock_chat_response, mock_render):
+    def test_chat_response(
+        self, mock_mode, mock_enrollment, mock_role, mock_waffle, mock_course_id, mock_chat_response, mock_render
+    ):
         mock_waffle.return_value = True
         mock_role.return_value = 'student'
         mock_mode.VERIFIED_MODES = ['verified']
         mock_enrollment.return_value = MagicMock(mode='verified')
         mock_chat_response.return_value = (200, {'role': 'assistant', 'content': 'Something else'})
         mock_render.return_value = 'This is a template'
+        mock_course_id.return_value = 'test+edx'
         test_unit_id = 'test-unit-id'
 
         test_data = [