Skip to content

Commit

Permalink
bug-1906959: Allow GCS buckets to be behind a CDN.
Browse files Browse the repository at this point in the history
  • Loading branch information
smarnach committed Jul 16, 2024
1 parent c23101d commit 475a1e8
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 29 deletions.
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,16 @@ services:
interval: 1s
timeout: 3s
retries: 5
depends_on:
- gcs-cdn

# nginx as a reverse proxy simulating a CDN in front of the GCS emulator.
gcs-cdn:
build:
context: docker/images/gcs-cdn
image: local/tecken_gcs_cdn
ports:
- "${EXPOSE_CDN_PORT:-8002}:8002"

# https://hub.docker.com/r/localstack/localstack/
# localstack running a fake AWS S3
Expand Down
4 changes: 2 additions & 2 deletions docker/config/local_dev.env
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ AWS_ENDPOINT_URL=http://localstack:4566/
DEBUG=true
LOCAL_DEV_ENV=true
SYMBOL_URLS=
UPLOAD_DEFAULT_URL=http://gcs-emulator:8001/publicbucket/
UPLOAD_TRY_SYMBOLS_URL=http://gcs-emulator:8001/publicbucket/try/
UPLOAD_DEFAULT_URL=http://gcs-emulator:8001/publicbucket?public_url=http://gcs-cdn:8002/publicbucket
UPLOAD_TRY_SYMBOLS_URL=http://gcs-emulator:8001/publicbucket/try?public_url=http://gcs-cdn:8002/publicbucket/try

# Default to the test oidcprovider container for Open ID Connect
#
Expand Down
2 changes: 2 additions & 0 deletions docker/images/gcs-cdn/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM nginx:1.27-alpine
COPY default.conf /etc/nginx/conf.d/default.conf
8 changes: 8 additions & 0 deletions docker/images/gcs-cdn/default.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
server {
listen 8002;
server_name cdn;

location / {
proxy_pass http://gcs-emulator:8001;
}
}
5 changes: 3 additions & 2 deletions tecken/download/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,14 @@ def download_symbol(request, debugfilename, debugid, filename, try_symbols=False
)
if metadata:
url = metadata.download_url
if "http://localstack:4566" in url and request.get_host() == "localhost:8000":
if request.get_host() == "localhost:8000":
# If doing local development, with Docker, you're most likely running
# localstack as a fake S3. It runs on its own hostname that is only
# available from other Docker containers. But to make it really convenient,
# for testing symbol download we'll rewrite the URL to one that is possible
# to reach from the host.
url = url.replace("localstack:4566", "localhost:4566")
url = url.replace("http://gcs-cdn:8002/", "http://localhost:8002/")
url = url.replace("http://localstack:4566/", "http://localhost:4566/")
response = http.HttpResponseRedirect(url)
if request._request_debug:
response["Debug-Time"] = elapsed_time
Expand Down
16 changes: 13 additions & 3 deletions tecken/ext/gcs/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from io import BufferedReader
import threading
from typing import Optional
from urllib.parse import urlparse
from urllib.parse import quote, urlparse

from django.conf import settings

Expand All @@ -23,9 +23,15 @@ class GCSStorage(StorageBackend):

accepted_hostnames = (".googleapis.com", "gcs-emulator", "gcs.example.com")

def __init__(self, url: str, try_symbols: bool = False):
def __init__(
self, url: str, try_symbols: bool = False, public_url: Optional[str] = None
):
url = url.removesuffix("/")
self.url = url
if public_url:
self.public_url = public_url.removesuffix("/")
else:
self.public_url = None
parsed_url = urlparse(url)
self.endpoint_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
self.name, _, self.prefix = parsed_url.path[1:].partition("/")
Expand Down Expand Up @@ -93,8 +99,12 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]:
original_content_length = int(original_content_length)
except ValueError:
original_content_length = None
if self.public_url:
download_url = f"{self.public_url}/v1/{quote(key)}"
else:
download_url = blob.public_url
metadata = ObjectMetadata(
download_url=blob.public_url,
download_url=download_url,
content_type=blob.content_type,
content_length=blob.size,
content_encoding=blob.content_encoding,
Expand Down
10 changes: 7 additions & 3 deletions tecken/libstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import datetime
from io import BufferedReader
from typing import Optional
from urllib.parse import urlparse
from urllib import parse


@dataclass
Expand Down Expand Up @@ -60,10 +60,14 @@ def new(cls, url: str, try_symbols: bool = False) -> "StorageBackend":
# _BACKEND_CLASSES is fully populated before proceeding. The redundant alias pacifies ruff.
from tecken import ext as ext

hostname = urlparse(url).hostname.lower().removesuffix(".")
parsed_url = parse.urlparse(url)
hostname = parsed_url.hostname.lower().removesuffix(".")
query_params = parse.parse_qs(parsed_url.query, max_num_fields=1)
kwargs = {k: v for k, [v] in query_params.items()}
url = parse.urlunparse(parsed_url._replace(query=""))
for backend_class in _BACKEND_CLASSES:
if hostname.endswith(backend_class.accepted_hostnames):
return backend_class(url, try_symbols)
return backend_class(url=url, try_symbols=try_symbols, **kwargs)
raise NoMatchingBackend(url)

def __repr__(self):
Expand Down
15 changes: 9 additions & 6 deletions tecken/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,24 @@ def get_test_storage_url(bucket_name):
"""Return a function to generate unique test storage URLs for the current test."""

def _get_test_storage_url(
kind: Literal["gcs", "s3"], try_symbols: bool = False
kind: Literal["gcs", "gcs-cdn", "s3"], try_symbols: bool = False
) -> str:
path = bucket_name
if try_symbols:
path += "/try"
match kind:
case "gcs":
url = f"http://gcs-emulator:8001/{bucket_name}"
url = f"http://gcs-emulator:8001/{path}"
case "gcs-cdn":
url = f"http://gcs-emulator:8001/{path}?public_url=http://gcs-cdn:8002/{path}"
case "s3":
url = f"http://localstack:4566/{bucket_name}"
if try_symbols:
url += "/try"
url = f"http://localstack:4566/{path}"
return url

return _get_test_storage_url


@pytest.fixture(params=["gcs", "s3"])
@pytest.fixture(params=["gcs", "gcs-cdn", "s3"])
def symbol_storage_no_create(request, settings, get_test_storage_url):
"""Replace the global SymbolStorage instance with a new instance.
Expand Down
6 changes: 2 additions & 4 deletions tecken/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@ def test_client_happy_path(client, db, symbol_storage, metricsmock):

response = client.get(url)
assert response.status_code == 302
assert (
response.headers["location"]
== f"{upload.backend.url}/v1/xul.pdb/44E4EC8C2F41492B9369D6B9A059577C2/xul.sym"
)
assert response.headers["location"] == upload.download_url
metricsmock.assert_histogram_once(
"tecken.symboldownloader.file_age_days",
value=0,
Expand Down Expand Up @@ -65,6 +62,7 @@ def test_client_try_download(client, db, symbol_storage, metricsmock):
)
response = client.get(try_url)
assert response.status_code == 302
assert response.headers["location"] == upload.download_url
# Also note that the headers are the same as for regular downloads
assert response["Access-Control-Allow-Origin"] == "*"
metricsmock.assert_histogram_once(
Expand Down
13 changes: 8 additions & 5 deletions tecken/tests/test_storage_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
from tecken.tests.utils import Upload, UPLOADS


@pytest.mark.parametrize("try_storage", [False, True])
@pytest.mark.parametrize("upload", UPLOADS.values(), ids=UPLOADS.keys())
@pytest.mark.parametrize("storage_kind", ["gcs", "s3"])
def test_upload_and_download(get_test_storage_url, storage_kind: str, upload: Upload):
url = get_test_storage_url(storage_kind)
@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"])
def test_upload_and_download(
get_test_storage_url, storage_kind: str, upload: Upload, try_storage: bool
):
url = get_test_storage_url(storage_kind, try_storage)
backend = StorageBackend.new(url)
backend.clear()
assert backend.exists()
Expand All @@ -37,7 +40,7 @@ def test_upload_and_download(get_test_storage_url, storage_kind: str, upload: Up
assert metadata.original_md5_sum == upload.metadata.original_md5_sum


@pytest.mark.parametrize("storage_kind", ["gcs", "s3"])
@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"])
def test_non_exsiting_bucket(get_test_storage_url, storage_kind: str):
url = get_test_storage_url(storage_kind)
backend = StorageBackend.new(url)
Expand Down Expand Up @@ -72,7 +75,7 @@ def test_storage_backend_new(url, kind, name, prefix):
assert isinstance(backend, S3Storage)


@pytest.mark.parametrize("storage_kind", ["gcs", "s3"])
@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"])
def test_storageerror_msg(get_test_storage_url, storage_kind: str):
url = get_test_storage_url(storage_kind)
backend = StorageBackend.new(url)
Expand Down
4 changes: 1 addition & 3 deletions tecken/tests/test_symbolstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ def test_get_metadata(symbol_storage, lower_case_debug_id):
if lower_case_debug_id:
debug_id = debug_id.lower()
metadata = symbol_storage.get_metadata("xul.pdb", debug_id, "xul.sym")
assert metadata.download_url == (
f"{upload.backend.url}/v1/xul.pdb/44E4EC8C2F41492B9369D6B9A059577C2/xul.sym"
)
assert metadata.download_url == upload.download_url
assert not symbol_storage.get_metadata(
"xxx.pdb", "44E4EC8C2F41492B9369D6B9A059577C2", "xxx.sym"
)
12 changes: 11 additions & 1 deletion tecken/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from hashlib import md5
from io import BytesIO
from typing import Optional
from urllib.parse import quote

from tecken.base.symbolstorage import SymbolStorage
from tecken.libstorage import ObjectMetadata, StorageBackend
Expand All @@ -23,9 +24,18 @@ class Upload:
backend: Optional[StorageBackend] = None

@property
def key(self):
def key(self) -> str:
return SymbolStorage.make_key(self.debug_file, self.debug_id, self.sym_file)

@property
def download_url(self) -> Optional[str]:
if not self.backend:
return None
base_url = getattr(self.backend, "public_url", None)
if not base_url:
base_url = self.backend.url
return f"{base_url}/v1/{quote(self.key)}"

@classmethod
def uncompressed(
cls, debug_file: str, debug_id: str, sym_file: str, body: bytes
Expand Down

0 comments on commit 475a1e8

Please sign in to comment.