openedx · pwnage101 · Sep 18, 2024 · Sep 19, 2024 · Sep 19, 2024 · Sep 20, 2024
diff --git a/enterprise_catalog/apps/api_client/discovery.py b/enterprise_catalog/apps/api_client/discovery.py
@@ -49,13 +49,14 @@ def _calculate_backoff(self, attempt_count):
         """
         return (self.BACKOFF_FACTOR * (2 ** (attempt_count - 1)))
 
-    def _retrieve_metadata_for_content_filter(self, content_filter, page, request_params):
+    def _retrieve_metadata_page_for_content_filter(self, content_filter, page, request_params):
         """
         Makes a request to discovery's /search/all/ endpoint with the specified
         content_filter, page, and request_params
         """
         LOGGER.info(f'Retrieving results from course-discovery for page {page}...')
         attempts = 0
+        request_params_with_page = request_params | {'page': page}
         while True:
             attempts = attempts + 1
             successful = True
@@ -64,7 +65,7 @@ def _retrieve_metadata_for_content_filter(self, content_filter, page, request_pa
                 response = self.client.post(
                     DISCOVERY_SEARCH_ALL_ENDPOINT,
                     json=content_filter,
-                    params=request_params,
+                    params=request_params_with_page,
                     timeout=self.HTTP_TIMEOUT,
                 )
                 successful = response.status_code < 400
@@ -99,6 +100,34 @@ def _retrieve_metadata_for_content_filter(self, content_filter, page, request_pa
             )
             raise err
 
+    def retrieve_metadata_for_content_filter(self, content_filter, request_params):
+        """
+        """
+        request_params_customized = request_params | {
+            # Increase number of results per page for the course-discovery response
+            'page_size': 100,
+            # Ensure paginated results are consistently ordered by `aggregation_key` and `start`
+            'ordering': 'aggregation_key,start',
+        }
+        page = 1
+        results = []
+        try:
+            response = self._retrieve_metadata_page_for_content_filter(content_filter, page, request_params_customized)
+            results += response.get('results', [])
+            # Traverse all pages and concatenate results
+            while response.get('next'):
+                page += 1
+                response = self._retrieve_metadata_page_for_content_filter(content_filter, page, request_params_customized)
+                results += response.get('results', [])
+        except Exception as exc:
+            LOGGER.exception(
+                'Could not retrieve content items from course-discovery (page %s): %s',
+                page,
+                exc,
+            )
+            raise exc
+        return results
+
     def _retrieve_course_reviews(self, request_params):
         """
         Makes a request to discovery's /api/v1/course_review/ paginated endpoint
@@ -251,7 +280,7 @@ def get_video_skills(self, video_usage_key):
 
         return video_skills
 
-    def get_metadata_by_query(self, catalog_query):
+    def get_metadata_by_query(self, catalog_query, extra_query_params=None):
         """
         Return results from the discovery service's search/all endpoint.
 
@@ -264,30 +293,17 @@ def get_metadata_by_query(self, catalog_query):
         request_params = {
             # Omit non-active course runs from the course-discovery results
             'exclude_expired_course_run': True,
-            # Increase number of results per page for the course-discovery response
-            'page_size': 100,
-            # Ensure paginated results are consistently ordered by `aggregation_key` and `start`
-            'ordering': 'aggregation_key,start',
             # Ensure to fetch learner pathways as part of search/all endpoint response.
             'include_learner_pathways': True,
-        }
-
-        page = 1
+        } | extra_query_params
         results = []
+
         try:
             content_filter = catalog_query.content_filter
-            response = self._retrieve_metadata_for_content_filter(content_filter, page, request_params)
-            results += response.get('results', [])
-            # Traverse all pages and concatenate results
-            while response.get('next'):
-                page += 1
-                request_params.update({'page': page})
-                response = self._retrieve_metadata_for_content_filter(content_filter, page, request_params)
-                results += response.get('results', [])
+            results.extend(self.retrieve_metadata_for_content_filter(content_filter, request_params))
         except Exception as exc:
             LOGGER.exception(
-                'Could not retrieve content items from course-discovery (page %s) for catalog query %s: %s',
-                page,
+                'Could not retrieve content items for catalog query %s: %s',
                 catalog_query,
                 exc,
             )

diff --git a/...apps/catalog/migrations/0040_contentmetadata_catalog_queries_for_restricted_course_run.py b/...apps/catalog/migrations/0040_contentmetadata_catalog_queries_for_restricted_course_run.py
@@ -0,0 +1,26 @@
+# Generated by Django 4.2.16 on 2024-09-19 23:34
+
+import collections
+from django.db import migrations, models
+import jsonfield.encoder
+import jsonfield.fields
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('catalog', '0039_alter_catalogquery_unique_together_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='contentmetadata',
+            name='catalog_queries_for_restricted_course_run',
+            field=models.ManyToManyField(related_name='restricted_course_runs', to='catalog.catalogquery'),
+        ),
+        migrations.AlterField(
+            model_name='catalogquery',
+            name='content_filter',
+            field=jsonfield.fields.JSONField(default=dict, dump_kwargs={'cls': jsonfield.encoder.JSONEncoder, 'ensure_ascii': False, 'indent': 4, 'separators': (',', ':')}, help_text="Query parameters which will be used to filter the discovery service's search/all endpoint results, specified as a JSON object.", load_kwargs={'object_pairs_hook': collections.OrderedDict}),
+        ),
+    ]
diff --git a/enterprise_catalog/apps/catalog/models.py b/enterprise_catalog/apps/catalog/models.py
@@ -20,7 +20,7 @@
     get_most_recent_modified_time,
     update_query_parameters,
 )
-from enterprise_catalog.apps.api_client.discovery import CatalogQueryMetadata
+from enterprise_catalog.apps.api_client.discovery import CatalogQueryMetadata, DiscoveryApiClient
 from enterprise_catalog.apps.api_client.enterprise_cache import (
     EnterpriseCustomerDetails,
 )
@@ -46,6 +46,7 @@
     get_content_type,
     get_content_uuid,
     get_parent_content_key,
+    is_content_restricted,
     localized_utcnow,
 )
 
@@ -629,6 +630,22 @@ class ContentMetadata(TimeStampedModel):
     )
     catalog_queries = models.ManyToManyField(CatalogQuery)
 
+    # Restricted runs receive their own parallel M2M mapping with CatalogQuery
+    # primarily because we don't want restricted runs to impact any existing
+    # logic as we roll out support.
+    #
+    # The queries we write generally don't include runs directly, so the
+    # `catalog_queries` field above generally doesn't relate course runs
+    # directly to CatalogQuery objects. As a result, downstream code broadly
+    # assumes that a run belongs to a CatalogQuery to which its parent course
+    # also belongs. Relating runs directly to CatalogQueries is a fundamental
+    # departure from the original architecture and base assumptions, hence the
+    # new field just to be on the safe side.
+    catalog_queries_for_restricted_course_run = models.ManyToManyField(
+        CatalogQuery,
+        related_name='restricted_course_runs',
+    )
+
     history = HistoricalRecords()
 
     objects = ContentMetadataManager()
@@ -994,6 +1011,69 @@ def _check_content_association_threshold(catalog_query, metadata_list):
     return False
 
 
+def get_restricted_runs_from_discovery(metadata_including_restricted, catalog_query, dry_run=False):
+    """
+    """
+    # Fast exit if the catalog query's content filter doesn't even specify restricted runs.
+    restricted_runs_allowed = catalog_query.restricted_runs_allowed
+    if not restricted_runs_allowed:
+        return []
+
+    course_keys_matching_query = {
+        get_content_key(metadata) for metadata in metadata_including_restricted
+        if get_content_type(metadata) == COURSE
+    }
+    # Collect all run keys explicitly marked as "allowed" in the content filter, and also a child of a course that
+    # matches the content filter.
+    restricted_run_keys_to_fetch = []
+    for course_key_with_restricted_runs, restricted_run_keys_allowed in restricted_runs_allowed.items():
+        if not course_key_with_restricted_runs in course_keys_matching_query:
+            LOGGER.warning(
+                (
+                    "Course key %s requested by content-filter for CatalogQuery %s to allow restricted runs, but the "
+                    "course did not match the rest of the filter, so its restricted runs will not be included in this "
+                    "CatalogQuery."
+                ),
+                course_key_with_restricted_runs,
+                catalog_query,
+            )
+            continue
+        restricted_run_keys_to_fetch.extend(restricted_run_keys_allowed)
+
+    # Fast exit if none of the courses requested actually match the content filter.
+    if not restricted_run_keys_to_fetch:
+        return []
+
+    # Prepare and make the API call to discovery to actually fetch the restricted runs.
+    content_filter = {
+        'key': restricted_run_keys_to_fetch,
+    }
+    request_params = {
+        # Omit non-active course runs from the course-discovery results
+        'exclude_expired_course_run': True,
+        # Make sure to include restricted runs.
+        'include_restricted': 'custom-b2b-enterprise',
+    }
+    discovery_client = DiscoveryApiClient()
+    return discovery_client.retrieve_metadata_for_content_filter(content_filter, request_params)
+
+
+def associate_restricted_runs_with_query(metadata, catalog_query, dry_run=False):
+    """
+    """
+    metadata_list = create_content_metadata(metadata, catalog_query, dry_run)
+    if dry_run:
+        old_restricted_runs_count = catalog_query.restricted_course_runs.count()
+        new_restricted_runs_count = len(metadata_list)
+        if old_restricted_runs_count != new_restricted_runs_count:
+            LOGGER.info('[Dry Run] Updated restricted_course_runs count ({} -> {}) for {}'.format(
+                old_restricted_runs_count, new_restricted_runs_count, catalog_query))
+    else:
+        catalog_query.restricted_course_runs.set(metadata_list, clear=True)
+    associated_content_keys = [metadata.content_key for metadata in metadata_list]
+    return associated_content_keys
+
+
 def associate_content_metadata_with_query(metadata, catalog_query, dry_run=False):
     """
     Creates or updates a ContentMetadata object for each entry in `metadata`,
@@ -1129,37 +1209,93 @@ def update_contentmetadata_from_discovery(catalog_query, dry_run=False):
     Returns:
         list of str: Returns the content keys that were associated from the query results.
     """
-
+    # Generate a content filter without the `restricted_runs_allowed` key which
+    # discovery doesn't understand anyway.
+    content_filter_clean = {k: v for k, v in catalog_query.content_filter.items() if k != 'restricted_runs_allowed'}
+    discovery_client = DiscoveryApiClient()
     try:
-        # metadata will be an empty dict if unavailable from cache or API.
-        metadata = CatalogQueryMetadata(catalog_query).metadata
+        # Fetch all content metadata matching the content filter, restricted or not.
+        # (metadata will be an empty dict if unavailable from cache or API.)
+        metadata = discovery_client.get_metadata_by_query(
+            content_filter_clean,
+            extra_query_params={"include_restricted": "custom-b2b-enterprise"},
+        )
     except Exception as exc:
         LOGGER.exception(f'update_contentmetadata_from_discovery failed {catalog_query}')
         raise exc
 
-    # associate content metadata with a catalog query only when we get valid results
-    # back from the discovery service. if metadata is `None`, an error occurred while
-    # calling discovery and we should not proceed with the below association logic.
-    if metadata:
-        metadata_content_keys = [get_content_key(entry) for entry in metadata]
-        LOGGER.info(
-            'Retrieved %d content items (%d unique) from course-discovery for catalog query %s',
-            len(metadata_content_keys),
-            len(set(metadata_content_keys)),
-            catalog_query,
-        )
+    # `None` likely means nothing matched the content filter, so exit early.
+    if not metadata:
+        return []
 
-        associated_content_keys = associate_content_metadata_with_query(metadata, catalog_query, dry_run)
-        LOGGER.info(
-            'Associated %d content items (%d unique) with catalog query %s',
-            len(associated_content_keys),
-            len(set(associated_content_keys)),
-            catalog_query,
-        )
+    # Partition metadata dicts by unrestricted vs. restricted.
+    restricted_metadata = []
+    unrestricted_metadata = []
+    for metadata_dict in metadata:
+        if is_content_restricted(metadata_dict):
+            restricted_metadata += metadata_dict
+        else:
+            unrestricted_metadata += metadata_dict
 
-        return associated_content_keys
+    # First pass, associate unrestricted content metadata with this CatalogQuery.
+    unrestricted_metadata_content_keys = [get_content_key(entry) for entry in unrestricted_metadata]
+    LOGGER.info(
+        'Retrieved %d unrestricted content items (%d unique) from course-discovery for catalog query %s',
+        len(unrestricted_metadata_content_keys),
+        len(set(unrestricted_metadata_content_keys)),
+        catalog_query,
+    )
+    associated_content_keys = associate_content_metadata_with_query(metadata, catalog_query, dry_run)
+    LOGGER.info(
+        'Associated %d content items (%d unique) with catalog query %s',
+        len(associated_content_keys),
+        len(set(associated_content_keys)),
+        catalog_query,
+    )
 
-    return []
+    # At this point, we know the `unrestricted_metadata` partition contains
+    # courses and runs that should be related to the CatalogQuery as restricted
+    # content, but it's still only a subset of restricted content that needs to
+    # be related. What's missing are any restricted runs that didn't match the
+    # original content filter, but whose parent (course) key DID match.
+    restricted_metadata_content_keys = [get_content_key(entry) for entry in restricted_metadata]
+    LOGGER.info(
+        'Retrieved %d restricted content items (%d unique) from course-discovery for catalog query %s',
+        len(restricted_metadata_content_keys),
+        len(set(restricted_metadata_content_keys)),
+        catalog_query,
+    )
+    resticted_run_metadata = get_restricted_runs_from_discovery(
+        # Intentionally pass the un-partitioned metadata list so that we can
+        # discover all restricted runs matching any course that matches the
+        # content_filter, restricted-only or not.
+        metadata,
+        catalog_query,
+        dry_run,
+    )
+    associated_restricted_run_keys = associate_restricted_runs_with_query(
+        # Union the following metadata lists:
+        # * `restricted_metadata`:
+        #     Includes any restricted-only courses, or restricted runs directly
+        #     matching the content filter, BUT not necessarily all restricted
+        #     runs beneath matching courses.
+        # * `restricted_run_metadata`:
+        #     Includes all restricted runs part of any course matching the
+        #     content filter, BUT no restricted-only courses.
+        restricted_metadata | resticted_run_metadata,
+        catalog_query,
+        dry_run
+    )
+    LOGGER.info(
+        'Associated %d restricted runs (%d unique) with catalog query %s',
+        len(associated_restricted_run_keys),
+        len(set(associated_restricted_run_keys)),
+        catalog_query,
+    )
+
+    # Consumers expect only the keys associated via the ContentMetadata.catalog_queries M2M field, not including
+    # restricted runs associated via the ContentMetadata.catalog_queries_for_restricted_course_run M2M field.
+    return associated_content_keys
 
 
 class CatalogUpdateCommandConfig(ConfigurationModel):

diff --git a/enterprise_catalog/apps/catalog/utils.py b/enterprise_catalog/apps/catalog/utils.py
@@ -17,7 +17,7 @@
     get_decoded_jwt as get_decoded_jwt_from_cookie
 from pytz import UTC
 
-from enterprise_catalog.apps.catalog.constants import COURSE_RUN
+from enterprise_catalog.apps.catalog.constants import COURSE, COURSE_RUN
 
 
 LOGGER = getLogger(__name__)
@@ -176,3 +176,19 @@ def get_course_run_by_uuid(course, course_run_uuid):
     except IndexError:
         return None
     return course_run
+
+def is_run_restricted(run_metadata_dict):
+    return run_metadata_dict.get('restriction_type') == 'restricted-b2b-enterprise'
+
+def is_content_restricted(metadata_dict):
+    """
+    The given course metadata contains ONLY restricted runs, or the given run is restricted.
+    """
+    content_type = get_content_type(metadata_dict)
+    if content_type == COURSE:
+        run_dicts = metadata_dict.get('course_runs', [])
+        return all(is_run_restricted(run) for run in run_dicts)
+    elif content_type == COURSE_RUN:
+        return is_run_restricted(metadata_dict)
+    # Programs, Learner Pathways, and other content types are never considered "restricted".
+    return False