Skip to content

Commit

Permalink
Add helper to get only the hostname out of MEDIA_URL (#14723)
Browse files Browse the repository at this point in the history
settings.MEDIA_URL (passed in here as media_url) is a custom route on our CDN
that points to a cloud bucket, which we use for uploaded media from the CMS.

Specifically, due to infra constraints, it has to point to a _sub-path_ in the bucket,
not the top/root of it. It also needs to be distinct from the CDN route that points to our
collected static assets (which are at https://<CDN_HOSTNAME>/media/ - that is STATIC_URL)
With all this in mind, MEDIA_URL, when set, points to https://<CDN_HOSTNAME>/media/cms/
When django-storages computes the URL for a object in that CMS bucket, it wants
just the hostname of the bucket, not the full CDN/proxy path to the subdir in the bucket,
because it opinionatedly concatenates it with GS_LOCATION (which is needed to ensure
the files are uploaded to the sub-path mentioned above).

TLDR: We just need the root of the CDN, from MEDIA_URL.
  • Loading branch information
stevejalim authored Jun 24, 2024
1 parent 0c04c6a commit 71a92e9
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
18 changes: 18 additions & 0 deletions bedrock/base/tests/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
from django.conf import settings
from django.test import override_settings

import pytest

from bedrock.settings.base import _get_media_cdn_hostname_for_storage_backend


@override_settings(DEV=False, PROD_LANGUAGES=("de", "fr", "nb-NO", "ja", "ja-JP-mac", "en-US", "en-GB"))
def test_lang_groups():
Expand All @@ -15,3 +19,17 @@ def test_lang_groups():
"ja": ["ja-JP-mac", "ja"],
"en": ["en-US", "en-GB"],
}


@pytest.mark.parametrize(
"media_url, expected_hostname",
(
("https://www-dev.allizom.org/media/cms/", "https://www-dev.allizom.org"),
("https://www-dev.allizom.org/some/future/assets/path/", "https://www-dev.allizom.org"),
("https://www.allizom.org/media/cms/", "https://www.allizom.org"),
("https://www.mozilla.org/media/cms/", "https://www.mozilla.org"),
("/custom-media/", "/custom-media/"), # this one is the default, used in local dev
),
)
def test_get_media_cdn_hostname(media_url, expected_hostname):
assert _get_media_cdn_hostname_for_storage_backend(media_url) == expected_hostname
30 changes: 29 additions & 1 deletion bedrock/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import sys
from os.path import abspath
from pathlib import Path
from urllib.parse import urlparse

from django.conf.locale import LANG_INFO # we patch this in bedrock.base.apps.BaseAppConfig # noqa: F401
from django.utils.functional import lazy
Expand Down Expand Up @@ -601,8 +602,35 @@ def language_url_map_with_fallbacks():
"cache_control": "max-age=2592000, public, immutable" # 2592000 == 30 days / 1 month
}


def _get_media_cdn_hostname_for_storage_backend(media_url):
# settings.MEDIA_URL (passed in here as media_url) is a custom route on our CDN
# that points to a cloud bucket, which we use for uploaded media from the CMS.
#
# Specifically, due to infra constraints, it has to point to a _sub-path_ in the bucket,
# not the top/root of it. It also needs to be distinct from the CDN route that points to our
# collected static assets (which are at https://<CDN_HOSTNAME>/media/ - that is STATIC_URL)
#
# With all this in mind, MEDIA_URL, when set, points to https://<CDN_HOSTNAME>/media/cms/
#
# When django-storages computes the URL for a object in that CMS bucket, it wants
# just the hostname of the bucket, not the full CDN/proxy path to the subdir in the bucket,
# because it opinionatedly concatenates it with GS_LOCATION (defined below, which ensures
# the files are uploaded to the sub-path mentioned above).
#
# TLDR: We just need the root of the CDN, from MEDIA_URL.

if media_url.startswith("http"):
media_url_parsed = urlparse(media_url)
media_cdn_hostname = f"{media_url_parsed.scheme}://{media_url_parsed.hostname}"
else:
media_cdn_hostname = media_url

return media_cdn_hostname


if GS_BUCKET_NAME and GS_PROJECT_ID:
GS_CUSTOM_ENDPOINT = MEDIA_URL.rstrip("/") # hostname that proxies the storage bucket
GS_CUSTOM_ENDPOINT = _get_media_cdn_hostname_for_storage_backend(MEDIA_URL) # hostname that proxies the storage bucket
GS_FILE_OVERWRITE = False
GS_LOCATION = "media/cms" # path within the bucket to upload to

Expand Down

0 comments on commit 71a92e9

Please sign in to comment.