From 2dd14f6c7e1ab3a3c8238405eb4fd6f7c2f2d7ad Mon Sep 17 00:00:00 2001 From: Sven Marnach Date: Wed, 10 Jul 2024 13:05:07 +0200 Subject: [PATCH] bug-1906959: Allow GCS buckets to be behind a CDN. --- docker-compose.yml | 10 ++++++++++ docker/config/local_dev.env | 1 + docker/images/gcs-cdn/Dockerfile | 2 ++ docker/images/gcs-cdn/default.conf | 8 ++++++++ tecken/download/views.py | 5 +++-- tecken/ext/gcs/storage.py | 11 ++++++++++- tecken/tests/conftest.py | 9 +++++++-- tecken/tests/test_storage_backends.py | 15 +++++++++------ tecken/tests/utils.py | 12 +++++++++++- 9 files changed, 61 insertions(+), 12 deletions(-) create mode 100644 docker/images/gcs-cdn/Dockerfile create mode 100644 docker/images/gcs-cdn/default.conf diff --git a/docker-compose.yml b/docker-compose.yml index 5dfa284a6..68f39a020 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -168,6 +168,16 @@ services: interval: 1s timeout: 3s retries: 5 + depends_on: + - gcs-cdn + + # nginx as a reverse proxy simulating a CDN in front of the GCS emulator. + gcs-cdn: + build: + context: docker/images/gcs-cdn + image: local/tecken_gcs_cdn + ports: + - "${EXPOSE_CDN_PORT:-8002}:8002" # https://hub.docker.com/r/localstack/localstack/ # localstack running a fake AWS S3 diff --git a/docker/config/local_dev.env b/docker/config/local_dev.env index f970a53a3..3a2438934 100644 --- a/docker/config/local_dev.env +++ b/docker/config/local_dev.env @@ -33,6 +33,7 @@ DEBUG=true LOCAL_DEV_ENV=true CLOUD_SERVICE_PROVIDER=GCS UPLOAD_GCS_BUCKET=publicbucket +UPLOAD_GCS_PUBLIC_URL=http://gcs-cdn:8002/publicbucket UPLOAD_S3_BUCKET=publicbucket # Default to the test oidcprovider container for Open ID Connect diff --git a/docker/images/gcs-cdn/Dockerfile b/docker/images/gcs-cdn/Dockerfile new file mode 100644 index 000000000..7ab8af11e --- /dev/null +++ b/docker/images/gcs-cdn/Dockerfile @@ -0,0 +1,2 @@ +FROM nginx:1.27-alpine +COPY default.conf /etc/nginx/conf.d/default.conf diff --git a/docker/images/gcs-cdn/default.conf b/docker/images/gcs-cdn/default.conf new file mode 100644 index 000000000..6bc668a85 --- /dev/null +++ b/docker/images/gcs-cdn/default.conf @@ -0,0 +1,8 @@ +server { + listen 8002; + server_name cdn; + + location / { + proxy_pass http://gcs-emulator:8001; + } +} diff --git a/tecken/download/views.py b/tecken/download/views.py index 97a0ddf07..230ec69cb 100644 --- a/tecken/download/views.py +++ b/tecken/download/views.py @@ -156,13 +156,14 @@ def download_symbol(request, debugfilename, debugid, filename, try_symbols=False ) if metadata: url = metadata.download_url - if "http://localstack:4566" in url and request.get_host() == "localhost:8000": + if request.get_host() == "localhost:8000": # If doing local development, with Docker, you're most likely running # localstack as a fake S3. It runs on its own hostname that is only # available from other Docker containers. But to make it really convenient, # for testing symbol download we'll rewrite the URL to one that is possible # to reach from the host. - url = url.replace("localstack:4566", "localhost:4566") + url = url.replace("http://gcs-cdn:8002/", "http://localhost:8002/") + url = url.replace("http://localstack:4566/", "http://localhost:4566/") response = http.HttpResponseRedirect(url) if request._request_debug: response["Debug-Time"] = elapsed_time diff --git a/tecken/ext/gcs/storage.py b/tecken/ext/gcs/storage.py index ace1a61fe..e7c6b9796 100644 --- a/tecken/ext/gcs/storage.py +++ b/tecken/ext/gcs/storage.py @@ -27,11 +27,16 @@ def __init__( prefix: str, try_symbols: bool = False, endpoint_url: Optional[str] = None, + public_url: Optional[str] = None, ): self.bucket = bucket self.prefix = prefix self.try_symbols = try_symbols self.endpoint_url = endpoint_url + if public_url: + self.public_url = public_url.removesuffix("/") + else: + self.public_url = None self.clients = threading.local() # The Cloud Storage client doesn't support setting global timeouts for all requests, so we # need to pass the timeout for every single request. the default timeout is 60 seconds for @@ -106,8 +111,12 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]: original_content_length = int(original_content_length) except ValueError: original_content_length = None + if self.public_url: + download_url = f"{self.public_url}/{quote(gcs_key)}" + else: + download_url = blob.public_url metadata = ObjectMetadata( - download_url=blob.public_url, + download_url=download_url, content_type=blob.content_type, content_length=blob.size, content_encoding=blob.content_encoding, diff --git a/tecken/tests/conftest.py b/tecken/tests/conftest.py index 5c83f5ca5..a31911d85 100644 --- a/tecken/tests/conftest.py +++ b/tecken/tests/conftest.py @@ -151,19 +151,24 @@ def get_storage_backend(bucket_name): """Return a function to create a unique storage backend for the current test.""" def _get_storage_backend( - kind: Literal["gcs", "s3"], try_symbols: bool = False + kind: Literal["gcs", "gcs-cdn", "s3"], try_symbols: bool = False ) -> StorageBackend: prefix = "try/" * try_symbols + "v1" match kind: case "gcs": return GCSStorage(bucket_name, prefix, try_symbols) + case "gcs-cdn": + public_url = f"http://gcs-cdn:8002/{bucket_name}" + return GCSStorage( + bucket_name, prefix, try_symbols, public_url=public_url + ) case "s3": return S3Storage(bucket_name, prefix, try_symbols) return _get_storage_backend -@pytest.fixture(params=["gcs", "s3"]) +@pytest.fixture(params=["gcs", "gcs-cdn", "s3"]) def symbol_storage_no_create(request, get_storage_backend): """Replace the global SymbolStorage instance with a new instance. diff --git a/tecken/tests/test_storage_backends.py b/tecken/tests/test_storage_backends.py index 10a5663e3..956351dcd 100644 --- a/tecken/tests/test_storage_backends.py +++ b/tecken/tests/test_storage_backends.py @@ -12,10 +12,13 @@ from tecken.tests.utils import Upload, UPLOADS +@pytest.mark.parametrize("try_storage", [False, True]) @pytest.mark.parametrize("upload", UPLOADS.values(), ids=UPLOADS.keys()) -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) -def test_upload_and_download(get_storage_backend, storage_kind: str, upload: Upload): - backend = get_storage_backend(storage_kind) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) +def test_upload_and_download( + get_storage_backend, storage_kind: str, upload: Upload, try_storage: bool +): + backend = get_storage_backend(storage_kind, try_storage) backend.clear() assert backend.exists() @@ -35,20 +38,20 @@ def test_upload_and_download(get_storage_backend, storage_kind: str, upload: Upl assert metadata.original_md5_sum == upload.metadata.original_md5_sum -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) def test_non_exsiting_bucket(get_storage_backend, storage_kind: str): backend = get_storage_backend(storage_kind) assert not backend.exists() -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) def test_storageerror_msg(get_storage_backend, storage_kind: str): backend = get_storage_backend(storage_kind) error = StorageError(backend) assert repr(backend) in str(error) -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) def test_s3_download_url(bucket_name: str, get_storage_backend, storage_kind: str): backend = get_storage_backend(storage_kind) backend.clear() diff --git a/tecken/tests/utils.py b/tecken/tests/utils.py index e73e9ab9d..fb18188dd 100644 --- a/tecken/tests/utils.py +++ b/tecken/tests/utils.py @@ -7,6 +7,7 @@ from hashlib import md5 from io import BytesIO from typing import Optional +from urllib.parse import quote from tecken.base.symbolstorage import SymbolStorage from tecken.libstorage import ObjectMetadata, StorageBackend @@ -23,9 +24,18 @@ class Upload: backend: Optional[StorageBackend] = None @property - def key(self): + def key(self) -> str: return SymbolStorage.make_key(self.debug_file, self.debug_id, self.sym_file) + @property + def download_url(self) -> Optional[str]: + if not self.backend: + return None + base_url = getattr(self.backend, "public_url", None) + if not base_url: + base_url = self.backend.url + return f"{base_url}/v1/{quote(self.key)}" + @classmethod def uncompressed( cls, debug_file: str, debug_id: str, sym_file: str, body: bytes