Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: normalize start/end/enroll_by dates across content types #666

Merged
merged 1 commit into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions enterprise_catalog/apps/api/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Constants for api app.
"""


class CourseMode():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit/inform]: Similar course mode constants are also defined under enterprise_catalog/apps/catalog/constants.py (e.g., CONTENT_COURSE_TYPE_ALLOW_LIST). We could likely consolidate to ensure that those hardcoded course modes also rely on this new CourseMode class.

That said, we might want to define this constant under the constants.py file under the catalog app, so they can be more readily shared across both the api and catalog apps.

I wouldn't say this is blocking feedback on this PR; likely something I can tackle in ENT-7548, which relates to ensuring these normalized dates get into Algolia so they're available across all course types.

"""
Content metadata course mode keys.

Copied from https://github.com/edx/edx-platform/blob/831a8bcc/common/djangoapps/course_modes/models.py#L164
"""
HONOR = "honor"
PROFESSIONAL = "professional"
VERIFIED = "verified"
AUDIT = "audit"
NO_ID_PROFESSIONAL_MODE = "no-id-professional"
CREDIT_MODE = "credit"
MASTERS = "masters"
EXECUTIVE_EDUCATION = "executive-education"
PAID_EXECUTIVE_EDUCATION = "paid-executive-education"
UNPAID_EXECUTIVE_EDUCATION = "unpaid-executive-education"
PAID_BOOTCAMP = "paid-bootcamp"
UNPAID_BOOTCAMP = "unpaid-bootcamp"
92 changes: 83 additions & 9 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from django_celery_results.models import TaskResult
from requests.exceptions import ConnectionError as RequestsConnectionError

from enterprise_catalog.apps.api.constants import CourseMode
from enterprise_catalog.apps.api_client.discovery import DiscoveryApiClient
from enterprise_catalog.apps.catalog.algolia_utils import (
ALGOLIA_FIELDS,
Expand Down Expand Up @@ -62,6 +63,16 @@
# ENT-4980 every batch "shard" record in Algolia should have all of these that pertain to the course
EXPLORE_CATALOG_TITLES = ['A la carte', 'Business', 'Education']

# The closer a mode is to the beginning of this list, the more likely a seat with that mode will be used to find the
# upgrade deadline for the course (and course run).
BEST_MODE_ORDER = [
CourseMode.VERIFIED,
CourseMode.PROFESSIONAL,
CourseMode.NO_ID_PROFESSIONAL_MODE,
CourseMode.UNPAID_EXECUTIVE_EDUCATION,
CourseMode.AUDIT,
]
pwnage101 marked this conversation as resolved.
Show resolved Hide resolved


def _fetch_courses_by_keys(course_keys):
"""
Expand Down Expand Up @@ -253,6 +264,68 @@ def update_full_content_metadata_task(self, force=False): # pylint: disable=unu
_update_full_content_metadata_program(content_keys)


def _find_best_mode_seat(seats):
"""
Find the seat with the "best" course mode. See BEST_MODE_ORDER to find which modes are best.
"""
sort_key_for_mode = {mode: index for (index, mode) in enumerate(BEST_MODE_ORDER)}

def sort_key(seat):
"""
Get a sort key (int) for a seat dictionary based on the position of its mode in the BEST_MODE_ORDER list.

Modes not found in the BEST_MODE_ORDER list get sorted to the end of the list.
"""
mode = seat['type']
return sort_key_for_mode.get(mode, len(sort_key_for_mode))

sorted_seats = sorted(seats, key=sort_key)
if sorted_seats:
return sorted_seats[0]
return None


def _normalize_course_metadata(course_metadata_record):
"""
Add normalized metadata keys with values calculated by normalizing existing keys. This will be helpful for
downstream consumers which no longer will need to do their own independent normalization.

At the time of writing, output normalized metadata keys incldue:

* normalized_metadata.start_date: When the course starts
* normalized_metadata.end_date: When the course ends
* normalized_metadata.enroll_by_date: The deadline for enrollment

Note that course-type-specific definitions of each of these keys may be more nuanced.
"""
json_meta = course_metadata_record.json_metadata
normalized_metadata = {}

# For each content type, find the values that correspond to the desired output key.
if course_metadata_record.is_exec_ed_2u_course:
# First case covers Exec Ed courses.
additional_metdata = json_meta.get('additional_metadata', {})
normalized_metadata['start_date'] = additional_metdata.get('start_date')
normalized_metadata['end_date'] = additional_metdata.get('end_date')
normalized_metadata['enroll_by_date'] = additional_metdata.get('registration_deadline')
else:
# Else case covers OCM courses.
advertised_course_run_uuid = json_meta.get('advertised_course_run_uuid')
advertised_course_run = _get_course_run_by_uuid(json_meta, advertised_course_run_uuid)
if advertised_course_run is not None:
normalized_metadata['start_date'] = advertised_course_run.get('start')
normalized_metadata['end_date'] = advertised_course_run.get('end')
all_seats = advertised_course_run.get('seats', [])
seat = _find_best_mode_seat(all_seats)
if seat:
normalized_metadata['enroll_by_date'] = seat.get('upgrade_deadline')
else:
logger.info(f"No Seat Found for course run '{advertised_course_run.get('key')}'. Seats: {all_seats}")

# Add normalized values to net-new keys:
json_meta['normalized_metadata'] = normalized_metadata


def _update_full_content_metadata_course(content_keys):
"""
Given content_keys, finds the associated ContentMetadata records with a type of course and looks up the full
Expand Down Expand Up @@ -296,8 +369,12 @@ def _update_full_content_metadata_course(content_keys):
if not metadata_record:
logger.error('Could not find ContentMetadata record for content_key %s.', content_key)
continue
# exec ed updates the start/end dates in additional metadata, so we have to manually
# move that over to our variables that we use

# Merge the full metadata from discovery's /api/v1/courses into the local metadata object.
metadata_record.json_metadata.update(course_metadata_dict)

# Exec ed provides the start/end dates in additional_metadata, so we should copy those over to the keys that
# we use (inside the advertised course run).
Comment on lines +376 to +377
Copy link
Contributor Author

@pwnage101 pwnage101 Aug 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note about this if-block: Prior to this PR, it ran BEFORE merging the full metadata from /api/v1/courses/ into the local metadata object, but now it runs AFTER merging full metadata. The additional_metadata key is not present in the /api/v1/search/all/ response, only in /api/v1/courses/, so the only way this code worked was by running the task twice on a given course.

Old behavior:

  1. Load basic metadata from /api/v1/search/all/.
  2. Attempt to copy start/end from additional_metdata (fails because additional_metdata doesn't exist yet).
  3. Merge full metadata from /api/v1/courses/ (adds additional_metdata).
  4. Load basic metadata from /api/v1/search/all/.
  5. Attempt to copy start/end from additional_metdata (success).

New behavior:

  1. Load basic metadata from /api/v1/search/all/.
  2. Merge full metadata from /api/v1/courses/ (adds additional_metdata).
  3. Attempt to copy start/end from additional_metdata (success).

if metadata_record.is_exec_ed_2u_course:
json_meta = metadata_record.json_metadata
start_date = json_meta.get('additional_metadata', {}).get('start_date')
Expand All @@ -306,13 +383,10 @@ def _update_full_content_metadata_course(content_keys):
for run in json_meta.get('course_runs'):
if run.get('uuid') == course_run_uuid:
run.update({'start': start_date, 'end': end_date})
course_run = _get_course_run_by_uuid(json_meta, course_run_uuid)
if course_run is not None:
course_run_meta = metadata_by_key.get(course_run.get('key'))
if hasattr(course_run_meta, 'json_metadata'):
course_run_meta.json_metadata.update({'start': start_date, 'end': end_date})
modified_content_metadata_records.append(course_run_meta)
Comment on lines -309 to -314
Copy link
Contributor Author

@pwnage101 pwnage101 Aug 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This section is being removed because it is a no-op. Course runs are never processed in this task.

https://github.com/openedx/enterprise-catalog/blob/d868759/enterprise_catalog/apps/api/tasks.py#L250-L251

metadata_record.json_metadata.update(course_metadata_dict)

# Perform more steps to normalize and move keys around for more consistency across content types.
_normalize_course_metadata(metadata_record)

modified_content_metadata_records.append(metadata_record)
program_content_keys = create_course_associated_programs(course_metadata_dict['programs'], metadata_record)
_update_full_content_metadata_program(program_content_keys)
Expand Down
Loading
Loading