From d579d6968780858ff214fc1dec5a13afade3b191 Mon Sep 17 00:00:00 2001 From: Sven Marnach Date: Fri, 19 Jul 2024 10:20:16 +0200 Subject: [PATCH] bug-1908868: Use structured configuration for backends. --- bin/setup-services.sh | 10 +- docker/config/local_dev.env | 6 +- tecken/api/views.py | 4 +- tecken/base/admin.py | 22 +--- tecken/base/symbolstorage.py | 59 ++++----- tecken/base/templates/admin/site_status.html | 20 +++ tecken/ext/__init__.py | 3 - tecken/ext/gcs/__init__.py | 2 - tecken/ext/gcs/storage.py | 36 ++++-- tecken/ext/s3/__init__.py | 2 - tecken/ext/s3/storage.py | 104 ++++------------ tecken/libdockerflow.py | 6 +- tecken/libstorage.py | 58 ++------- tecken/settings.py | 123 ++++++++++++------- tecken/tests/conftest.py | 41 +++---- tecken/tests/test_download.py | 20 +-- tecken/tests/test_libdockerflow.py | 18 +-- tecken/tests/test_storage_backends.py | 64 ++++------ tecken/tests/test_symbolstorage.py | 4 +- tecken/tests/test_upload.py | 2 +- tecken/upload/utils.py | 4 +- tecken/upload/views.py | 4 +- 22 files changed, 258 insertions(+), 354 deletions(-) diff --git a/bin/setup-services.sh b/bin/setup-services.sh index 2b986ca45..c8978cd10 100755 --- a/bin/setup-services.sh +++ b/bin/setup-services.sh @@ -7,14 +7,12 @@ set -eo pipefail # Set up S3 -python bin/s3_cli.py delete "${UPLOAD_DEFAULT_URL}" -python bin/s3_cli.py create "${UPLOAD_DEFAULT_URL}" +python bin/s3_cli.py delete "${UPLOAD_S3_BUCKET}" +python bin/s3_cli.py create "${UPLOAD_S3_BUCKET}" # Set up GCS -# FIXME bug 1827506: update argument as needed once GCS is -# implemented in the source code. -python bin/gcs_cli.py delete publicbucket -python bin/gcs_cli.py create publicbucket +python bin/gcs_cli.py delete "${UPLOAD_GCS_BUCKET}" +python bin/gcs_cli.py create "${UPLOAD_GCS_BUCKET}" # Set up db python bin/db.py drop || true diff --git a/docker/config/local_dev.env b/docker/config/local_dev.env index f362623fe..f970a53a3 100644 --- a/docker/config/local_dev.env +++ b/docker/config/local_dev.env @@ -31,9 +31,9 @@ AWS_ENDPOINT_URL=http://localstack:4566/ DEBUG=true LOCAL_DEV_ENV=true -SYMBOL_URLS=http://localstack:4566/publicbucket/ -UPLOAD_DEFAULT_URL=http://localstack:4566/publicbucket/ -UPLOAD_TRY_SYMBOLS_URL=http://localstack:4566/publicbucket/try/ +CLOUD_SERVICE_PROVIDER=GCS +UPLOAD_GCS_BUCKET=publicbucket +UPLOAD_S3_BUCKET=publicbucket # Default to the test oidcprovider container for Open ID Connect # diff --git a/tecken/api/views.py b/tecken/api/views.py index 31df462b5..3d617ad3b 100644 --- a/tecken/api/views.py +++ b/tecken/api/views.py @@ -96,8 +96,8 @@ def possible_upload_urls(request): context = { "urls": [ { - "url": upload_backend.url, - "bucket_name": upload_backend.name, + "bucket_name": upload_backend.bucket, + "prefix": upload_backend.prefix, "default": True, } ] diff --git a/tecken/base/admin.py b/tecken/base/admin.py index 13e25e225..10780b0cb 100644 --- a/tecken/base/admin.py +++ b/tecken/base/admin.py @@ -2,7 +2,6 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -import json import time from urllib.parse import urlparse, urlunparse @@ -22,6 +21,8 @@ import redis.exceptions +from tecken.base import symbolstorage + ACTION_TO_NAME = {ADDITION: "add", CHANGE: "change", DELETION: "delete"} @@ -117,25 +118,10 @@ def clean_url(value): for key in keys: value = getattr(settings, key) context["settings"].append({"key": key, "value": value}) - - # Now for some oddballs - context["settings"].append( - {"key": "UPLOAD_DEFAULT_URL", "value": clean_url(settings.UPLOAD_DEFAULT_URL)} - ) - context["settings"].append( - { - "key": "UPLOAD_TRY_SYMBOLS_URL", - "value": clean_url(settings.UPLOAD_TRY_SYMBOLS_URL), - } - ) - context["settings"].append( - { - "key": "SYMBOL_URLS", - "value": json.dumps([clean_url(x) for x in settings.SYMBOL_URLS]), - } - ) context["settings"].sort(key=lambda x: x["key"]) + context["backends"] = symbolstorage.SYMBOL_STORAGE.get_download_backends(True) + # Get some table counts tables = [ "auth_user", diff --git a/tecken/base/symbolstorage.py b/tecken/base/symbolstorage.py index 099ec9bd1..c86dda969 100644 --- a/tecken/base/symbolstorage.py +++ b/tecken/base/symbolstorage.py @@ -9,65 +9,54 @@ from django.utils import timezone from tecken.libmarkus import METRICS -from tecken.libstorage import ObjectMetadata, StorageBackend +from tecken.libstorage import ObjectMetadata, StorageBackend, backend_from_config logger = logging.getLogger("tecken") class SymbolStorage: - """Persistent wrapper around multiple StorageBackend instances.""" + """Persistent wrapper around multiple StorageBackend instances. + + :arg upload_backend: The upload and download backend for regular storage. + :arg try_upload_backend: The upload and download backend for try storage. + :arg download_backends: Additional download backends. + """ def __init__( - self, upload_url: str, download_urls: list[str], try_url: Optional[str] = None + self, + upload_backend: StorageBackend, + try_upload_backend: StorageBackend, + download_backends: list[StorageBackend], ): - # The upload backend for regular storage. - self.upload_backend: StorageBackend = StorageBackend.new(upload_url) - - # All additional download backends, except for the regular upload backend. - download_backends = [ - StorageBackend.new(url) for url in download_urls if url != upload_url - ] - - # All backends - self.backends: list[StorageBackend] = [self.upload_backend, *download_backends] - - # The try storage backend for both upload and download, if any. - if try_url is None: - self.try_backend: Optional[StorageBackend] = None - else: - self.try_backend: Optional[StorageBackend] = StorageBackend.new( - try_url, try_symbols=True - ) - self.backends.append(self.try_backend) + self.upload_backend = upload_backend + self.try_upload_backend = try_upload_backend + self.backends = [upload_backend, try_upload_backend, *download_backends] @classmethod def from_settings(cls): - return cls( - upload_url=settings.UPLOAD_DEFAULT_URL, - download_urls=settings.SYMBOL_URLS, - try_url=settings.UPLOAD_TRY_SYMBOLS_URL, - ) + upload_backend = backend_from_config(settings.UPLOAD_BACKEND) + try_upload_backend = backend_from_config(settings.TRY_UPLOAD_BACKEND) + download_backends = list(map(backend_from_config, settings.DOWNLOAD_BACKENDS)) + return cls(upload_backend, try_upload_backend, download_backends) def __repr__(self): - urls = [backend.url for backend in self.backends] - return f"<{self.__class__.__name__} urls={urls}>" + backend_reprs = " ".join(map(repr, self.backends)) + return f"<{self.__class__.__name__} backends: {backend_reprs}>" def get_download_backends(self, try_storage: bool) -> list[StorageBackend]: """Return a list of all download backends. Includes the try backend if `try_storage` is set to `True`. """ - return [ - backend - for backend in self.backends - if try_storage or not backend.try_symbols - ] + if try_storage: + return self.backends + return [backend for backend in self.backends if not backend.try_symbols] def get_upload_backend(self, try_storage: bool) -> StorageBackend: """Return either the regular or the try upload backends.""" if try_storage: - return self.try_backend + return self.try_upload_backend return self.upload_backend @staticmethod diff --git a/tecken/base/templates/admin/site_status.html b/tecken/base/templates/admin/site_status.html index a2fff4f8a..a483767c7 100644 --- a/tecken/base/templates/admin/site_status.html +++ b/tecken/base/templates/admin/site_status.html @@ -31,6 +31,26 @@

Settings

+

Backends

+ + + + + + + + + + {% for item in backends %} + + + + + + {% endfor %} + +
bucketprefixtry_symbols
{{ item.bucket }}{{ item.prefix }}{{ item.try_symbols }}
+

Table counts

diff --git a/tecken/ext/__init__.py b/tecken/ext/__init__.py index 9975538f3..448bb8652 100644 --- a/tecken/ext/__init__.py +++ b/tecken/ext/__init__.py @@ -1,6 +1,3 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. - -from tecken.ext import gcs as gcs -from tecken.ext import s3 as s3 diff --git a/tecken/ext/gcs/__init__.py b/tecken/ext/gcs/__init__.py index a43356b7a..448bb8652 100644 --- a/tecken/ext/gcs/__init__.py +++ b/tecken/ext/gcs/__init__.py @@ -1,5 +1,3 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. - -from tecken.ext.gcs import storage as storage diff --git a/tecken/ext/gcs/storage.py b/tecken/ext/gcs/storage.py index d53497586..ace1a61fe 100644 --- a/tecken/ext/gcs/storage.py +++ b/tecken/ext/gcs/storage.py @@ -5,7 +5,7 @@ from io import BufferedReader import threading from typing import Optional -from urllib.parse import urlparse +from urllib.parse import quote from django.conf import settings @@ -21,16 +21,17 @@ class GCSStorage(StorageBackend): An implementation of the StorageBackend interface for Google Cloud Storage. """ - accepted_hostnames = (".googleapis.com", "gcs-emulator", "gcs.example.com") - - def __init__(self, url: str, try_symbols: bool = False): - url = url.removesuffix("/") - self.url = url - parsed_url = urlparse(url) - self.endpoint_url = f"{parsed_url.scheme}://{parsed_url.netloc}" - self.name, _, self.prefix = parsed_url.path[1:].partition("/") - self.prefix = (self.prefix + "/v1").removeprefix("/") + def __init__( + self, + bucket: str, + prefix: str, + try_symbols: bool = False, + endpoint_url: Optional[str] = None, + ): + self.bucket = bucket + self.prefix = prefix self.try_symbols = try_symbols + self.endpoint_url = endpoint_url self.clients = threading.local() # The Cloud Storage client doesn't support setting global timeouts for all requests, so we # need to pass the timeout for every single request. the default timeout is 60 seconds for @@ -38,7 +39,7 @@ def __init__(self, url: str, try_symbols: bool = False): self.timeout = (settings.S3_CONNECT_TIMEOUT, settings.S3_READ_TIMEOUT) def __repr__(self): - return f"<{self.__class__.__name__} url={self.url!r} try_symbols={self.try_symbols}" + return f"<{self.__class__.__name__} gs://{self.bucket}/{self.prefix}>" def _get_client(self) -> storage.Client: """Return a thread-local low-level storage client.""" @@ -52,7 +53,9 @@ def _get_bucket(self) -> storage.Bucket: if not hasattr(self.clients, "bucket"): client = self._get_client() try: - self.clients.bucket = client.get_bucket(self.name, timeout=self.timeout) + self.clients.bucket = client.get_bucket( + self.bucket, timeout=self.timeout + ) except NotFound as exc: raise StorageError(self) from exc return self.clients.bucket @@ -72,6 +75,12 @@ def exists(self) -> bool: raise StorageError(self) from exc return True + def get_download_url(self, key: str) -> str: + """Return the download URL for the given key.""" + endpoint_url = self.endpoint_url or self._get_client().meta.endpoint_url + endpoint_url = endpoint_url.removesuffix("/") + return f"{endpoint_url}/{self.bucket}/{self.prefix}/{quote(key)}" + def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]: """Return object metadata for the object with the given key. @@ -83,8 +92,9 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]: :raises StorageError: an unexpected backend-specific error was raised """ bucket = self._get_bucket() + gcs_key = f"{self.prefix}/{key}" try: - blob = bucket.get_blob(f"{self.prefix}/{key}", timeout=self.timeout) + blob = bucket.get_blob(gcs_key, timeout=self.timeout) if not blob: return None except ClientError as exc: diff --git a/tecken/ext/s3/__init__.py b/tecken/ext/s3/__init__.py index 746a3d867..448bb8652 100644 --- a/tecken/ext/s3/__init__.py +++ b/tecken/ext/s3/__init__.py @@ -1,5 +1,3 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. - -from tecken.ext.s3 import storage as storage diff --git a/tecken/ext/s3/storage.py b/tecken/ext/s3/storage.py index 70b7d8e0a..6bbb0f93f 100644 --- a/tecken/ext/s3/storage.py +++ b/tecken/ext/s3/storage.py @@ -3,102 +3,42 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. from io import BufferedReader -import re import threading from typing import Optional -from urllib.parse import quote, urlparse +from urllib.parse import quote import boto3.session from botocore.config import Config from botocore.exceptions import BotoCoreError, ClientError - from django.conf import settings from tecken.libstorage import ObjectMetadata, StorageBackend, StorageError -ALL_POSSIBLE_S3_REGIONS: tuple[str] = tuple( - boto3.session.Session().get_available_regions("s3") -) - - class S3Storage(StorageBackend): """ An implementation of the StorageBackend interface for Amazon S3. """ - accepted_hostnames = (".amazonaws.com", "localstack", "s3.example.com") - - # A substring match of the domain is used to recognize storage backends. - # For emulated backends, the name should be present in the docker compose - # service name. - _URL_FINGERPRINT: list[str] = { - # AWS S3, like bucket-name.s3.amazonaws.com - "s3": ".amazonaws.com", - # Localstack S3 Emulator - "emulated-s3": "localstack", - # S3 test domain - "test-s3": "s3.example.com", - } - - def __init__(self, url: str, try_symbols: bool = False, file_prefix: str = "v1"): - self.url = url - parsed = urlparse(url) - self.scheme = parsed.scheme - self.netloc = parsed.netloc - - # Determine the backend from the netloc (domain plus port) - self.backend = None - for backend, fingerprint in self._URL_FINGERPRINT.items(): - if fingerprint in self.netloc: - self.backend = backend - break - if self.backend is None: - raise ValueError(f"Storage backend not recognized in {url!r}") - - try: - name, prefix = parsed.path[1:].split("/", 1) - if prefix.endswith("/"): - prefix = prefix[:-1] - except ValueError: - prefix = "" - name = parsed.path[1:] - self.name = name - if file_prefix: - if prefix: - prefix += f"/{file_prefix}" - else: - prefix = file_prefix + def __init__( + self, + bucket: str, + prefix: str, + try_symbols: bool = False, + endpoint_url: Optional[str] = None, + region: Optional[str] = None, + ): + self.bucket = bucket self.prefix = prefix self.try_symbols = try_symbols - self.endpoint_url = None - self.region = None - if not self.backend == "s3": - # the endpoint_url will be all but the path - self.endpoint_url = f"{parsed.scheme}://{parsed.netloc}" - region = re.findall(r"s3[.-](.*)\.amazonaws\.com", parsed.netloc) - if region: - if region[0] not in ALL_POSSIBLE_S3_REGIONS: - raise ValueError(f"Not valid S3 region {region[0]}") - self.region = region[0] + self.endpoint_url = endpoint_url + self.region = region self.clients = threading.local() - @property - def base_url(self): - """Return base url for objects managed by this storage backend - - For objects in S3, this includes the domain and bucket name. - """ - return f"{self.scheme}://{self.netloc}/{self.name}" - def __repr__(self): - return ( - f"<{self.__class__.__name__} name={self.name!r} " - + f"endpoint_url={self.endpoint_url!r} region={self.region!r} " - + f"backend={self.backend!r}>" - ) + return f"<{self.__class__.__name__} s3://{self.bucket}/{self.prefix}>" - def get_storage_client(self): + def _get_client(self): """Return a backend-specific client.""" if not hasattr(self.clients, "storage"): options = { @@ -121,10 +61,10 @@ def exists(self) -> bool: :raises StorageError: an unexpected backend-specific error was raised """ - client = self.get_storage_client() + client = self._get_client() try: - client.head_bucket(Bucket=self.name) + client.head_bucket(Bucket=self.bucket) except ClientError as exc: # A generic ClientError can be raised if: # - The bucket doesn't exist (code 404) @@ -149,9 +89,10 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]: :raises StorageError: an unexpected backend-specific error was raised """ - client = self.get_storage_client() + client = self._get_client() + s3_key = f"{self.prefix}/{key}" try: - response = client.head_object(Bucket=self.name, Key=f"{self.prefix}/{key}") + response = client.head_object(Bucket=self.bucket, Key=s3_key) except ClientError as exc: if exc.response["Error"]["Code"] == "404": return None @@ -165,8 +106,9 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]: original_content_length = int(original_content_length) except ValueError: original_content_length = None + endpoint_url = client.meta.endpoint_url.removesuffix("/") metadata = ObjectMetadata( - download_url=f"{self.base_url}/{self.prefix}/{quote(key)}", + download_url=f"{endpoint_url}/{self.bucket}/{quote(s3_key)}", content_type=response.get("ContentType"), content_length=response["ContentLength"], content_encoding=response.get("ContentEncoding"), @@ -195,7 +137,7 @@ def upload(self, key: str, body: BufferedReader, metadata: ObjectMetadata): if metadata.original_md5_sum: s3_metadata["original_md5_hash"] = metadata.original_md5_sum kwargs = { - "Bucket": self.name, + "Bucket": self.bucket, "Key": f"{self.prefix}/{key}", "Body": body, "Metadata": s3_metadata, @@ -207,7 +149,7 @@ def upload(self, key: str, body: BufferedReader, metadata: ObjectMetadata): if metadata.content_length: kwargs["ContentLength"] = metadata.content_length - client = self.get_storage_client() + client = self._get_client() try: client.put_object(**kwargs) except (ClientError, BotoCoreError) as exc: diff --git a/tecken/libdockerflow.py b/tecken/libdockerflow.py index 5d5d298e8..54484e791 100644 --- a/tecken/libdockerflow.py +++ b/tecken/libdockerflow.py @@ -18,16 +18,14 @@ def check_storage_urls(app_configs, **kwargs): if not backend.exists(): errors.append( checks.Error( - f"Unable to connect to {backend.url} (bucket={backend.name!r}), " - f"because bucket not found", + f"Unable to connect to {backend!r}), because bucket not found", id="tecken.health.E001", ) ) except StorageError as error: errors.append( checks.Error( - f"Unable to connect to {backend.url} (bucket={backend.name!r}), " - f"due to {error.backend_msg}", + f"Unable to connect to {backend!r}), due to {error}", id="tecken.health.E002", ) ) diff --git a/tecken/libstorage.py b/tecken/libstorage.py index f25e49562..dea6cda72 100644 --- a/tecken/libstorage.py +++ b/tecken/libstorage.py @@ -5,8 +5,9 @@ from dataclasses import dataclass import datetime from io import BufferedReader -from typing import Optional -from urllib.parse import urlparse +from typing import Any, Optional + +from django.utils.module_loading import import_string @dataclass @@ -29,49 +30,14 @@ class StorageBackend: """Interface for storage backends.""" # The bucket name for this backend - name: str + bucket: str - # Configured backend URL - url: str + # The prefix for object keys in the bucket + prefix: str # Whether the backend handles try symboles try_symbols: bool - # Class attribute that lists the accepted hostnames. - accepted_hostnames: tuple[str, ...] - - def __init_subclass__(cls): - assert hasattr(cls, "accepted_hostnames") - _BACKEND_CLASSES.append(cls) - - @classmethod - def new(cls, url: str, try_symbols: bool = False) -> "StorageBackend": - """Create an instance of a StorageBackend subclass. - - The hostname part of the given URL is checked against the ``acepted_hostnames`` list of all - implementations of ``StorageBackend``. If the URL hostname ends with any of the accepted - hostnames, the corresponding class is instantiated. - - :returns: A new instance of a StorageBackend subclass. - - :raises: NoMatchingBackend id no subclass accepts the URL. - """ - # The modules in the ext package are not explicitly imported anywhere. We need to make sure - # _BACKEND_CLASSES is fully populated before proceeding. The redundant alias pacifies ruff. - from tecken import ext as ext - - hostname = urlparse(url).hostname.lower().removesuffix(".") - for backend_class in _BACKEND_CLASSES: - if hostname.endswith(backend_class.accepted_hostnames): - return backend_class(url, try_symbols) - raise NoMatchingBackend(url) - - def __repr__(self): - return ( - f"<{self.__class__.__name__} name={self.name} url={self.url} " - + f"try_symbols={self.try_symbols}" - ) - def exists(self) -> bool: """Check that this storage exists. @@ -115,12 +81,6 @@ def __init__(self, backend: StorageBackend): super().__init__(f"Error in backend {backend!r}") -class NoMatchingBackend(Exception): - """No backend class accepts the given URL.""" - - def __init__(self, url: str): - super().__init__(f"No backend class accepts URL {url}") - - -# List of all classes implementing StorageBackend -_BACKEND_CLASSES: list[type[StorageBackend]] = [] +def backend_from_config(config: dict[str, Any]) -> StorageBackend: + cls = import_string(config["class"]) + return cls(**config["options"]) diff --git a/tecken/settings.py b/tecken/settings.py index a8c9ab1c7..0f9896ffb 100644 --- a/tecken/settings.py +++ b/tecken/settings.py @@ -12,6 +12,7 @@ import socket import dj_database_url +from django.core.exceptions import ImproperlyConfigured from dockerflow.version import get_version from everett.manager import ConfigManager, ListOf @@ -70,9 +71,6 @@ def dict_parser(val): ("OIDC_OP_USER_ENDPOINT", "http://example.com/"), ("DATABASE_URL", "postgresql://postgres:postgres@db/tecken"), ("REDIS_URL", "redis://redis-cache:6379/0"), - ("SYMBOL_URLS", "https://s3.example.com/bucket"), - ("UPLOAD_DEFAULT_URL", "https://s3.example.com/bucket"), - ("UPLOAD_TRY_SYMBOLS_URL", "https://s3.example.com/bucket/try/"), ] for key, val in fake_values: os.environ[key] = val @@ -433,12 +431,6 @@ def filter(self, record): }, } -CLOUD_SERVICE_PROVIDER = _config( - "CLOUD_SERVICE_PROVIDER", - default="AWS", - doc="The cloud service provider Tecken is using. Either AWS or GCP.", -) - S3_CONNECT_TIMEOUT = _config( "S3_LOOKUP_CONNECT_TIMEOUT", default="5", @@ -573,42 +565,85 @@ def filter(self, record): ] -SYMBOL_URLS = _config( - "SYMBOL_URLS", - parser=ListOf(str), - doc=( - "Comma-separated list of urls for symbol downloads.\n\n" - "Lookups are performed in list order." - ), -) - -UPLOAD_DEFAULT_URL = _config( - "UPLOAD_DEFAULT_URL", - doc=( - "The default url to use for symbol uploads. This must be an item in " - "SYMBOL_URLS." - ), -) - -UPLOAD_TRY_SYMBOLS_URL = _config( - "UPLOAD_TRY_SYMBOLS_URL", - doc=( - "When an upload comes in with symbols from a Try build, these symbols " - "mustn't be uploaded with the regular symbols.\n\n" - "You could set this to UPLOAD_DEFAULT_URL with a '/try' prefix.\n\n" - "For example::\n\n" - " UPLOAD_DEFAULT_URL=http://s3.example.com/publicbucket/\n" - " UPLOAD_TRY_SYMBOLS_URL=http://s3.example.com/publicbucket/try/" - ), -) +CLOUD_SERVICE_PROVIDER = _config( + "CLOUD_SERVICE_PROVIDER", + default="AWS", + doc="The cloud service provider Tecken is using. Either AWS or GCP.", +).upper() -# If UPLOAD_DEFAULT_URL is not in SYMBOL_URLS it's just too weird. It means we'd upload -# to a S3 bucket we'd never read from and thus it'd be impossible to know the upload -# worked. -if UPLOAD_DEFAULT_URL not in SYMBOL_URLS: - raise ValueError( - f"The UPLOAD_DEFAULT_URL ({UPLOAD_DEFAULT_URL!r}) has to be one of the URLs " - f"in SYMBOL_URLS ({SYMBOL_URLS!r})" +if CLOUD_SERVICE_PROVIDER == "GCS": + UPLOAD_GCS_BUCKET = _config( + "UPLOAD_GCS_BUCKET", + doc="The GCS bucket name for uploads and downloads.", + ) + DOWNLOAD_S3_BUCKET = _config( + "DOWNLOAD_S3_BUCKET", + raise_error=False, + doc="The S3 bucket name for downloads.", + ) + UPLOAD_BACKEND = { + "class": "tecken.ext.gcs.storage.GCSStorage", + "options": { + "bucket": UPLOAD_GCS_BUCKET, + "prefix": "v1", + "try_symbols": False, + }, + } + TRY_UPLOAD_BACKEND = { + "class": "tecken.ext.gcs.storage.GCSStorage", + "options": { + "bucket": UPLOAD_GCS_BUCKET, + "prefix": "try/v1", + "try_symbols": True, + }, + } + DOWNLOAD_BACKENDS = [] + if DOWNLOAD_S3_BUCKET: + DOWNLOAD_BACKENDS = [ + { + "class": "tecken.ext.s3.storage.S3Storage", + "options": { + "bucket": DOWNLOAD_S3_BUCKET, + "prefix": "v1", + "try_symbols": False, + "anonymous": True, + }, + }, + { + "class": "tecken.ext.s3.storage.S3Storage", + "options": { + "bucket": DOWNLOAD_S3_BUCKET, + "prefix": "try/v1", + "try_symbols": True, + "anonymous": True, + }, + }, + ] +elif CLOUD_SERVICE_PROVIDER == "AWS": + UPLOAD_S3_BUCKET = _config( + "UPLOAD_S3_BUCKET", + doc="The S3 bucket name for uploads and downloads.", + ) + UPLOAD_BACKEND = { + "class": "tecken.ext.s3.storage.S3Storage", + "options": { + "bucket": UPLOAD_S3_BUCKET, + "prefix": "v1", + "try_symbols": False, + }, + } + TRY_UPLOAD_BACKEND = { + "class": "tecken.ext.s3.storage.S3Storage", + "options": { + "bucket": UPLOAD_S3_BUCKET, + "prefix": "try/v1", + "try_symbols": True, + }, + } + DOWNLOAD_BACKENDS = [] +else: + raise ImproperlyConfigured( + f"invalid value for CLOUD_SERVICE_PROVIDER: {CLOUD_SERVICE_PROVIDER}" ) COMPRESS_EXTENSIONS = _config( diff --git a/tecken/tests/conftest.py b/tecken/tests/conftest.py index 373a1dd18..5c83f5ca5 100644 --- a/tecken/tests/conftest.py +++ b/tecken/tests/conftest.py @@ -18,6 +18,7 @@ from tecken.base.symbolstorage import SymbolStorage from tecken.ext.gcs.storage import GCSStorage from tecken.ext.s3.storage import S3Storage +from tecken.libstorage import StorageBackend @pytest.fixture(autouse=True) @@ -106,11 +107,11 @@ def clear_s3_storage(self: S3Storage): """Make sure the S3 bucket exists and delete all files under the prefix.""" # NOTE(smarnach): This gets patched into S3Storage as a method. I don't want this to exist in # production code, since it should never get called there. - client = self.get_storage_client() - client.create_bucket(Bucket=self.name) - response = client.list_objects_v2(Bucket=self.name, Prefix=self.prefix) + client = self._get_client() + client.create_bucket(Bucket=self.bucket) + response = client.list_objects_v2(Bucket=self.bucket, Prefix=self.prefix) for object in response.get("Contents", []): - client.delete_object(Bucket=self.name, Key=object["Key"]) + client.delete_object(Bucket=self.bucket, Key=object["Key"]) S3Storage.clear = clear_s3_storage @@ -122,7 +123,7 @@ def clear_gcs_storage(self: GCSStorage): # production code, since it should never get called there. client = self._get_client() try: - client.create_bucket(self.name) + client.create_bucket(self.bucket) except Conflict: # Bucket already exists. pass @@ -146,37 +147,31 @@ def bucket_name(request): @pytest.fixture -def get_test_storage_url(bucket_name): - """Return a function to generate unique test storage URLs for the current test.""" +def get_storage_backend(bucket_name): + """Return a function to create a unique storage backend for the current test.""" - def _get_test_storage_url( + def _get_storage_backend( kind: Literal["gcs", "s3"], try_symbols: bool = False - ) -> str: + ) -> StorageBackend: + prefix = "try/" * try_symbols + "v1" match kind: case "gcs": - url = f"http://gcs-emulator:8001/{bucket_name}" + return GCSStorage(bucket_name, prefix, try_symbols) case "s3": - url = f"http://localstack:4566/{bucket_name}" - if try_symbols: - url += "/try" - return url + return S3Storage(bucket_name, prefix, try_symbols) - return _get_test_storage_url + return _get_storage_backend @pytest.fixture(params=["gcs", "s3"]) -def symbol_storage_no_create(request, settings, get_test_storage_url): +def symbol_storage_no_create(request, get_storage_backend): """Replace the global SymbolStorage instance with a new instance. This fixture does not create and clean the storage buckets. """ - - settings.UPLOAD_DEFAULT_URL = get_test_storage_url(request.param) - settings.SYMBOL_URLS = [] - settings.UPLOAD_TRY_SYMBOLS_URL = get_test_storage_url( - request.param, try_symbols=True - ) - symbol_storage = SymbolStorage.from_settings() + upload_backend = get_storage_backend(request.param) + try_upload_backend = get_storage_backend(request.param, try_symbols=True) + symbol_storage = SymbolStorage(upload_backend, try_upload_backend, []) with mock.patch("tecken.base.symbolstorage.SYMBOL_STORAGE", symbol_storage): yield symbol_storage diff --git a/tecken/tests/test_download.py b/tecken/tests/test_download.py index c0dde6d70..14fc87e0a 100644 --- a/tecken/tests/test_download.py +++ b/tecken/tests/test_download.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse from django.urls import reverse +import requests from tecken.download import views from tecken.tests.utils import UPLOADS @@ -27,16 +28,16 @@ def test_client_happy_path(client, db, symbol_storage, metricsmock): response = client.get(url) assert response.status_code == 302 - assert ( - response.headers["location"] - == f"{upload.backend.url}/v1/xul.pdb/44E4EC8C2F41492B9369D6B9A059577C2/xul.sym" - ) metricsmock.assert_histogram_once( "tecken.symboldownloader.file_age_days", value=0, tags=["storage:regular", "host:testnode"], ) + response = requests.get(response.headers["location"]) + assert response.status_code == 200 + assert response.content == upload.original_body + response = client.head(url) assert response.status_code == 200 assert response["Access-Control-Allow-Origin"] == "*" @@ -44,10 +45,9 @@ def test_client_happy_path(client, db, symbol_storage, metricsmock): def test_client_try_download(client, db, symbol_storage, metricsmock): - """Suppose there's a file that doesn't exist in any of the - settings.SYMBOL_URLS but does exist in settings.UPLOAD_TRY_SYMBOLS_URL, - then to reach that file you need to use ?try on the URL. - + """ + Suppose there's a file that doesn't exist in any of the regular storage backends but does + exist in try storage, then to reach that file you need to use ?try on the URL. """ upload = UPLOADS["compressed"] upload.upload(symbol_storage, try_storage=True) @@ -73,6 +73,10 @@ def test_client_try_download(client, db, symbol_storage, metricsmock): tags=["storage:try", "host:testnode"], ) + response = requests.get(response.headers["location"]) + assert response.status_code == 200 + assert response.content == upload.original_body + # And like regular download, you're only allowed to use GET or HEAD response = client.put(try_url) assert response.status_code == 405 diff --git a/tecken/tests/test_libdockerflow.py b/tecken/tests/test_libdockerflow.py index 7e99f0b5a..a239ad97f 100644 --- a/tecken/tests/test_libdockerflow.py +++ b/tecken/tests/test_libdockerflow.py @@ -6,7 +6,7 @@ import pytest from tecken import libdockerflow -from tecken.base.symbolstorage import symbol_storage, SymbolStorage +from tecken.base.symbolstorage import symbol_storage from tecken.libdockerflow import get_version_info, get_release_name @@ -15,18 +15,10 @@ def test_check_storage_urls_happy_path(symbol_storage): assert not errors -def test_check_storage_urls_missing(get_test_storage_url): - # NOTE(smarnach): We don't use the symbol_storage fixture here since it creates the backend - # buckets, and we want the buckets to be missing in this test. - symbol_storage = SymbolStorage( - upload_url=get_test_storage_url("gcs"), - download_urls=[get_test_storage_url("s3")], - ) - with patch("tecken.base.symbolstorage.SYMBOL_STORAGE", symbol_storage): - errors = libdockerflow.check_storage_urls(None) - assert len(errors) == 2 - assert symbol_storage.backends[0].name in errors[0].msg - assert symbol_storage.backends[1].name in errors[1].msg +def test_check_storage_urls_missing(symbol_storage_no_create): + errors = libdockerflow.check_storage_urls(None) + assert len(errors) == 1 + assert symbol_storage_no_create.get_upload_backend(False).bucket in errors[0].msg for error in errors: assert "bucket not found" in error.msg assert error.id == "tecken.health.E001" diff --git a/tecken/tests/test_storage_backends.py b/tecken/tests/test_storage_backends.py index e0927e79d..10a5663e3 100644 --- a/tecken/tests/test_storage_backends.py +++ b/tecken/tests/test_storage_backends.py @@ -3,21 +3,19 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. from datetime import datetime +from urllib.parse import urlparse import pytest import requests -from tecken.ext.gcs.storage import GCSStorage -from tecken.ext.s3.storage import S3Storage -from tecken.libstorage import NoMatchingBackend, StorageBackend, StorageError +from tecken.libstorage import StorageError from tecken.tests.utils import Upload, UPLOADS @pytest.mark.parametrize("upload", UPLOADS.values(), ids=UPLOADS.keys()) @pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) -def test_upload_and_download(get_test_storage_url, storage_kind: str, upload: Upload): - url = get_test_storage_url(storage_kind) - backend = StorageBackend.new(url) +def test_upload_and_download(get_storage_backend, storage_kind: str, upload: Upload): + backend = get_storage_backend(storage_kind) backend.clear() assert backend.exists() @@ -38,43 +36,29 @@ def test_upload_and_download(get_test_storage_url, storage_kind: str, upload: Up @pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) -def test_non_exsiting_bucket(get_test_storage_url, storage_kind: str): - url = get_test_storage_url(storage_kind) - backend = StorageBackend.new(url) +def test_non_exsiting_bucket(get_storage_backend, storage_kind: str): + backend = get_storage_backend(storage_kind) assert not backend.exists() -def test_unknown_hostname(): - with pytest.raises(NoMatchingBackend): - StorageBackend.new("https://mozilla.org/test-bucket") - - -@pytest.mark.parametrize( - "url,kind,name,prefix", - [ - ("https://s3.amazonaws.com/some-bucket", "s3", "some-bucket", "v1"), - ("https://s3-eu-west-2.amazonaws.com/other-bucket", "s3", "other-bucket", "v1"), - ("http://s3.example.com/buck/prfx", "s3", "buck", "prfx/v1"), - ("http://localstack:4566/publicbucket/try", "s3", "publicbucket", "try/v1"), - ("https://storage.googleapis.com/some-bucket", "gcs", "some-bucket", "v1"), - ("http://gcs.example.com/buck/prfx", "gcs", "buck", "prfx/v1"), - ("http://gcs-emulator:8001/publicbucket/try", "gcs", "publicbucket", "try/v1"), - ], -) -def test_storage_backend_new(url, kind, name, prefix): - backend = StorageBackend.new(url) - assert backend.name == name - assert backend.prefix == prefix - match kind: - case "gcs": - assert isinstance(backend, GCSStorage) - case "s3": - assert isinstance(backend, S3Storage) - - @pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) -def test_storageerror_msg(get_test_storage_url, storage_kind: str): - url = get_test_storage_url(storage_kind) - backend = StorageBackend.new(url) +def test_storageerror_msg(get_storage_backend, storage_kind: str): + backend = get_storage_backend(storage_kind) error = StorageError(backend) assert repr(backend) in str(error) + + +@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +def test_s3_download_url(bucket_name: str, get_storage_backend, storage_kind: str): + backend = get_storage_backend(storage_kind) + backend.clear() + upload = UPLOADS["special_chars"] + upload.upload_to_backend(backend) + metadata = backend.get_object_metadata(upload.key) + parsed_url = urlparse(metadata.download_url) + bucket, _, key = parsed_url.path[1:].partition("/") + assert bucket == bucket_name + assert ( + key + == "v1/libc%2B%2Babi.dylib/43940F08B65E38888CD3C52398EB1CA10/libc%2B%2Babi.dylib.sym" + ) diff --git a/tecken/tests/test_symbolstorage.py b/tecken/tests/test_symbolstorage.py index e478eb523..726724608 100644 --- a/tecken/tests/test_symbolstorage.py +++ b/tecken/tests/test_symbolstorage.py @@ -38,9 +38,7 @@ def test_get_metadata(symbol_storage, lower_case_debug_id): if lower_case_debug_id: debug_id = debug_id.lower() metadata = symbol_storage.get_metadata("xul.pdb", debug_id, "xul.sym") - assert metadata.download_url == ( - f"{upload.backend.url}/v1/xul.pdb/44E4EC8C2F41492B9369D6B9A059577C2/xul.sym" - ) + assert metadata.download_url assert not symbol_storage.get_metadata( "xxx.pdb", "44E4EC8C2F41492B9369D6B9A059577C2", "xxx.sym" ) diff --git a/tecken/tests/test_upload.py b/tecken/tests/test_upload.py index 07dc90e07..f7f5b91eb 100644 --- a/tecken/tests/test_upload.py +++ b/tecken/tests/test_upload.py @@ -139,7 +139,7 @@ def test_upload_try_symbols_happy_path( token.permissions.add(permission) # Upload one of the files so that when the upload happens, it's an update. - symbol_storage.try_backend.upload( + symbol_storage.try_upload_backend.upload( key="flag/deadbeef/flag.jpeg", body=BytesIO(b"abc123"), metadata=ObjectMetadata(), diff --git a/tecken/upload/utils.py b/tecken/upload/utils.py index cf0d5128e..db80f2653 100644 --- a/tecken/upload/utils.py +++ b/tecken/upload/utils.py @@ -251,7 +251,7 @@ def upload_file_upload( file_upload = FileUpload.objects.create( upload=upload, - bucket_name=backend.name, + bucket_name=backend.bucket, key=key_name, update=bool(existing_metadata), compressed=compressed, @@ -265,7 +265,7 @@ def upload_file_upload( ) metadata.content_length = size - logger.debug(f"Uploading file {key_name!r} into {backend.name!r}") + logger.debug(f"Uploading file {key_name!r} into {backend.bucket!r}") with METRICS.timer("upload_put_object"): with open(file_path, "rb") as f: backend.upload(key_name, f, metadata) diff --git a/tecken/upload/views.py b/tecken/upload/views.py index 6a8b08378..243cd79de 100644 --- a/tecken/upload/views.py +++ b/tecken/upload/views.py @@ -267,7 +267,7 @@ def upload_archive(request, upload_workspace): upload_obj = Upload.objects.create( user=request.user, filename=name, - bucket_name=backend.name, + bucket_name=backend.bucket, size=size, download_url=url, redirect_urls=redirect_urls, @@ -323,7 +323,7 @@ def upload_archive(request, upload_workspace): ) METRICS.incr( - "upload_uploads", tags=[f"try:{is_try_upload}", f"bucket:{backend.name}"] + "upload_uploads", tags=[f"try:{is_try_upload}", f"bucket:{backend.bucket}"] ) return http.JsonResponse({"upload": _serialize_upload(upload_obj)}, status=201)