Skip to content

Commit

Permalink
feat: use course cache to retrieve course data
Browse files Browse the repository at this point in the history
  • Loading branch information
alangsto committed Feb 26, 2024
1 parent fddf0bc commit e60d711
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 38 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Change Log
Unreleased
**********

4.1.0 - 2024-02-26
******************
* Use course cache to inject course title and course skill names into prompt template.

4.0.0 - 2024-02-21
******************
* Remove use of course waffle flag. Use the django setting LEARNING_ASSISTANT_AVAILABLE
Expand Down
37 changes: 37 additions & 0 deletions docs/decisions/0004-course-cache-use.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
4. Use of the LMS course caches
###############################

Status
******

**Accepted** *2024-02-26*

Context
*******
Each course run ID in edx-platform is associated with another course ID. While for many courses, the mapping between
course run ID and course ID is straight forward, i.e. edX+testX+2019 -> edX+testX, this is not the case for every
course on edX. The discovery service is the source of truth for mappings between course run ID and course IDs, and
string manipulation cannot be relied on as an accurate way to map between the two forms of ID.

The learning-assistant CourseChatView NEED LINK accepts a course run ID as a query parameter, but parts of the python API defined
in the learning assistant backend also require the course ID associated with a given course.

Decision
********
In order to determine the mapping between a course run ID and course ID in the learning-assistant app, we will make
use of an existing cache that is defined in edx-platform (https://github.com/openedx/edx-platform/blob/c61df904c1d2a5f523f1da44460c21e17ec087ee/openedx/core/djangoapps/catalog/utils.py#L801).
This is a long term cache with a TTL of 24 hours, and on a cache miss this code will call the discovery API directly.

Consequences
************
* If the cache were to be removed, code in the learning-assistant repository would no longer function as expected.
* On a cache miss, the learning-assistant backend will incur additional performance cost on calls to the discovery API.

Rejected Alternatives
*********************
* Calling the discovery API directly from the learning-assistant backend
* This would require building a custom solution in the learning-assistant app to call the discovery service directly.
* Without a cache, this would impact performance on every call to the learning-assistant backend.
* Using string manipulation to map course run ID to course ID.
* If we do not use the discovery service as our source of truth for course run ID to course ID mappings,
we run the risk of being unable to support courses that do not fit the usual pattern mapping.
2 changes: 1 addition & 1 deletion learning_assistant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
Plugin for a learning assistant backend, intended for use within edx-platform.
"""

__version__ = '4.0.0'
__version__ = '4.1.0'

default_app_config = 'learning_assistant.apps.LearningAssistantConfig' # pylint: disable=invalid-name
23 changes: 19 additions & 4 deletions learning_assistant/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from learning_assistant.platform_imports import (
block_get_children,
block_leaf_filter,
get_cache_course_data,
get_cache_course_run_data,
get_single_block,
get_text_transcript,
traverse_block_pre_order,
Expand Down Expand Up @@ -101,19 +103,23 @@ def get_block_content(request, user_id, course_id, unit_usage_key):
return cache_data['content_length'], cache_data['content_items']


def render_prompt_template(request, user_id, course_id, unit_usage_key):
def render_prompt_template(request, user_id, course_run_id, unit_usage_key, course_id):
"""
Return a rendered prompt template, specified by the LEARNING_ASSISTANT_PROMPT_TEMPLATE setting.
"""
unit_content = ''

course_run_key = CourseKey.from_string(course_id)
course_run_key = CourseKey.from_string(course_run_id)
if unit_usage_key and course_content_enabled(course_run_key):
_, unit_content = get_block_content(request, user_id, course_id, unit_usage_key)
_, unit_content = get_block_content(request, user_id, course_run_id, unit_usage_key)

course_data = get_cache_course_data(course_id, ['skill_names', 'title'])
skill_names = course_data['skill_names']
title = course_data['title']

template_string = getattr(settings, 'LEARNING_ASSISTANT_PROMPT_TEMPLATE', '')
template = Environment(loader=BaseLoader).from_string(template_string)
data = template.render(unit_content=unit_content)
data = template.render(unit_content=unit_content, skill_names=skill_names, title=title)
return data


Expand Down Expand Up @@ -168,3 +174,12 @@ def set_learning_assistant_enabled(course_key, enabled):
course_key=obj.course_id,
enabled=obj.enabled
)


def get_course_id(course_run_id):
"""
Given a course run id (str), return the associated course key.
"""
course_data = get_cache_course_run_data(course_run_id, ['course'])
course_key = course_data['course']
return course_key
16 changes: 14 additions & 2 deletions learning_assistant/platform_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,26 @@ def get_cache_course_run_data(course_run_id, fields):
"""
Return course run related data given a course run id.
This function makes use of the discovery course run cache, which is necessary because
only the discovery service stores the relation between courseruns and courses.
This function makes use of the course run cache in the LMS, which caches data from the discovery service. This is
necessary because only the discovery service stores the relation between courseruns and courses.
"""
# pylint: disable=import-error, import-outside-toplevel
from openedx.core.djangoapps.catalog.utils import get_course_run_data
return get_course_run_data(course_run_id, fields)


def get_cache_course_data(course_id, fields):
"""
Return course related data given a course id.
This function makes use of the course cache in the LMS, which caches data from the discovery service. This is
necessary because only the discovery service stores course skills data.
"""
# pylint: disable=import-error, import-outside-toplevel
from openedx.core.djangoapps.catalog.utils import get_course_data
return get_course_data(course_id, fields)


def get_user_role(user, course_key):
"""
Return the role of the user on the edX platform.
Expand Down
18 changes: 4 additions & 14 deletions learning_assistant/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from requests.exceptions import ConnectTimeout
from rest_framework import status as http_status

from learning_assistant.platform_imports import get_cache_course_run_data

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -60,24 +59,15 @@ def get_reduced_message_list(prompt_template, message_list):
return new_message_list


def get_course_id(course_run_id):
"""
Given a course run id (str), return the associated course key.
"""
course_data = get_cache_course_run_data(course_run_id, ['course'])
course_key = course_data['course']
return course_key


def create_request_body(prompt_template, message_list, courserun_id):
def create_request_body(prompt_template, message_list, course_id):
"""
Form request body to be passed to the chat endpoint.
"""
response_body = {
'context': {
'content': prompt_template,
'render': {
'doc_id': get_course_id(courserun_id),
'doc_id': course_id,
'fields': ['skillNames', 'title']
}
},
Expand All @@ -87,7 +77,7 @@ def create_request_body(prompt_template, message_list, courserun_id):
return response_body


def get_chat_response(prompt_template, message_list, courserun_id):
def get_chat_response(prompt_template, message_list, course_id):
"""
Pass message list to chat endpoint, as defined by the CHAT_COMPLETION_API setting.
"""
Expand All @@ -98,7 +88,7 @@ def get_chat_response(prompt_template, message_list, courserun_id):
connect_timeout = getattr(settings, 'CHAT_COMPLETION_API_CONNECT_TIMEOUT', 1)
read_timeout = getattr(settings, 'CHAT_COMPLETION_API_READ_TIMEOUT', 15)

body = create_request_body(prompt_template, message_list, courserun_id)
body = create_request_body(prompt_template, message_list, course_id)

try:
response = requests.post(
Expand Down
9 changes: 7 additions & 2 deletions learning_assistant/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
except ImportError:
pass

from learning_assistant.api import learning_assistant_enabled, render_prompt_template
from learning_assistant.api import get_course_id, learning_assistant_enabled, render_prompt_template
from learning_assistant.serializers import MessageSerializer
from learning_assistant.utils import get_chat_response, user_role_is_staff

Expand Down Expand Up @@ -93,7 +93,12 @@ def post(self, request, course_id):
}
)

prompt_template = render_prompt_template(request, request.user.id, course_id, unit_id)
# the "course id" being passed in as a query parameter is really a course run id. For the sake of
# clarity here, the value is reassigned to a more aptly named variable.
course_run_id = course_id
course_id = get_course_id(course_run_id)

prompt_template = render_prompt_template(request, request.user.id, course_run_id, unit_id, course_id)

status_code, message = get_chat_response(prompt_template, message_list, course_id)

Expand Down
15 changes: 10 additions & 5 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def setUp(self):
]
self.block = FakeBlock(self.children)

self.course_id = 'course-v1:edx+test+23'
self.course_run_id = 'course-v1:edx+test+23'

@ddt.data(
('video', True),
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_get_block_content(self, mock_get_children_contents, mock_get_single_blo
# args does not matter for this test right now, as the `get_single_block` function is entirely mocked.
request = MagicMock()
user_id = 1
course_id = self.course_id
course_id = self.course_run_id
unit_usage_key = 'block-v1:edX+A+B+type@vertical+block@verticalD'

length, items = get_block_content(request, user_id, course_id, unit_usage_key)
Expand All @@ -183,20 +183,25 @@ def test_get_block_content(self, mock_get_children_contents, mock_get_single_blo
('', False),
)
@ddt.unpack
@patch('learning_assistant.api.get_cache_course_data')
@patch('learning_assistant.toggles._is_learning_assistant_waffle_flag_enabled')
@patch('learning_assistant.api.get_block_content')
def test_render_prompt_template(self, unit_content, flag_enabled, mock_get_content, mock_is_flag_enabled):
def test_render_prompt_template(
self, unit_content, flag_enabled, mock_get_content, mock_is_flag_enabled, mock_cache
):
mock_get_content.return_value = (len(unit_content), unit_content)
mock_is_flag_enabled.return_value = flag_enabled
mock_cache.return_value = {'skill_names': ['skills'], 'title': ['title']}

# mock arguments that are passed through to `get_block_content` function. the value of these
# args does not matter for this test right now, as the `get_block_content` function is entirely mocked.
request = MagicMock()
user_id = 1
course_id = self.course_id
course_run_id = self.course_run_id
unit_usage_key = 'block-v1:edX+A+B+type@vertical+block@verticalD'
course_id = 'edx+test'

prompt_text = render_prompt_template(request, user_id, course_id, unit_usage_key)
prompt_text = render_prompt_template(request, user_id, course_run_id, unit_usage_key, course_id)

if unit_content and flag_enabled:
self.assertIn(unit_content, prompt_text)
Expand Down
11 changes: 2 additions & 9 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,7 @@ def setUp(self):
self.prompt_template = 'This is a prompt.'

self.message_list = [{'role': 'assistant', 'content': 'Hello'}, {'role': 'user', 'content': 'Goodbye'}]
self.course_id = 'course-v1:edx+test+23'
self.course_key = 'edx-test'

self.patcher = patch(
'learning_assistant.utils.get_cache_course_run_data',
return_value={'course': self.course_key}
)
self.patcher.start()
self.course_id = 'edx+test'

def get_response(self):
return get_chat_response(self.prompt_template, self.message_list, self.course_id)
Expand Down Expand Up @@ -99,7 +92,7 @@ def test_post_request_structure(self, mock_requests):
'context': {
'content': self.prompt_template,
'render': {
'doc_id': self.course_key,
'doc_id': self.course_id,
'fields': ['skillNames', 'title']
}
},
Expand Down
6 changes: 5 additions & 1 deletion tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,17 +145,21 @@ def test_invalid_messages(self, mock_role, mock_waffle, mock_render):

@patch('learning_assistant.views.render_prompt_template')
@patch('learning_assistant.views.get_chat_response')
@patch('learning_assistant.views.get_course_id')
@patch('learning_assistant.views.learning_assistant_enabled')
@patch('learning_assistant.views.get_user_role')
@patch('learning_assistant.views.CourseEnrollment.get_enrollment')
@patch('learning_assistant.views.CourseMode')
def test_chat_response(self, mock_mode, mock_enrollment, mock_role, mock_waffle, mock_chat_response, mock_render):
def test_chat_response(
self, mock_mode, mock_enrollment, mock_role, mock_waffle, mock_course_id, mock_chat_response, mock_render
):
mock_waffle.return_value = True
mock_role.return_value = 'student'
mock_mode.VERIFIED_MODES = ['verified']
mock_enrollment.return_value = MagicMock(mode='verified')
mock_chat_response.return_value = (200, {'role': 'assistant', 'content': 'Something else'})
mock_render.return_value = 'This is a template'
mock_course_id.return_value = 'test+edx'
test_unit_id = 'test-unit-id'

test_data = [
Expand Down

0 comments on commit e60d711

Please sign in to comment.