From 0bcf10e574634372a6b8b3b4b018b331e704d1ae Mon Sep 17 00:00:00 2001 From: Brendon Smith Date: Sun, 25 Sep 2022 17:27:37 -0400 Subject: [PATCH] WIP: Add support for Cloudflare R2 object storage --- .github/workflows/ci.yml | 3 +++ docs/cloud-object-storage.md | 30 ++++++++++++++++----- docs/contributing.md | 9 +++++++ fastenv/cloud/object_storage.py | 36 ++++++++++++++++++------- tests/cloud/test_object_storage.py | 42 +++++++++++++++++++++++++++--- tests/conftest.py | 10 ++++++- 6 files changed, 109 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 69fc3fa..51996f0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,6 +89,9 @@ jobs: BACKBLAZE_B2_BUCKET_HOST: ${{ secrets.BACKBLAZE_B2_BUCKET_HOST }} BACKBLAZE_B2_BUCKET_REGION: ${{ secrets.BACKBLAZE_B2_BUCKET_REGION }} BACKBLAZE_B2_SECRET_KEY_FASTENV: ${{ secrets.BACKBLAZE_B2_SECRET_KEY_FASTENV }} + CLOUDFLARE_R2_ACCESS_KEY_FASTENV: ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_FASTENV }} + CLOUDFLARE_R2_BUCKET_HOST: ${{ secrets.CLOUDFLARE_R2_BUCKET_HOST }} + CLOUDFLARE_R2_SECRET_KEY_FASTENV: ${{ secrets.CLOUDFLARE_R2_SECRET_KEY_FASTENV }} - name: Enforce test coverage run: hatch run ${{ env.HATCH_ENV }}:coverage report - name: Build Python package diff --git a/docs/cloud-object-storage.md b/docs/cloud-object-storage.md index 8df47b2..b4f04f3 100644 --- a/docs/cloud-object-storage.md +++ b/docs/cloud-object-storage.md @@ -2,7 +2,7 @@ ## Overview -Dotenv files are commonly kept in [cloud object storage](https://en.wikipedia.org/wiki/Cloud_storage), but environment variable management packages typically don't integrate with object storage clients. Additional logic is therefore required to download the files from object storage prior to loading environment variables. This project offers integration with S3-compatible object storage. [AWS S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html) and [Backblaze B2](https://www.backblaze.com/b2/docs/) are directly supported and tested. +Dotenv files are commonly kept in [cloud object storage](https://en.wikipedia.org/wiki/Cloud_storage), but environment variable management packages typically don't integrate with object storage clients. Additional logic is therefore required to download the files from object storage prior to loading environment variables. This project offers integration with S3-compatible object storage. [AWS S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html), [Backblaze B2](https://www.backblaze.com/b2/docs/), and [Cloudflare R2](https://developers.cloudflare.com/r2/) are directly supported and tested. !!!note "Why not Boto3?" @@ -329,6 +329,8 @@ Here's an example of how this could be implemented. ### AWS S3 - Pricing + - \$23/TB/month for storage + - \$90/TB/month outbound (also called download or egress), with further complex and expensive egress fees - See the [Backblaze B2 pricing page](https://www.backblaze.com/b2/cloud-storage-pricing.html) for comparisons - See [Backblaze Blog 2021-12-03: Why the world needs lower egress fees](https://www.backblaze.com/blog/why-the-world-needs-lower-egress-fees/) and [Cloudflare Blog 2021-07-23: AWS's egregious egress](https://blog.cloudflare.com/aws-egregious-egress/) for criticisms - Identity and Access Management (IAM): @@ -349,7 +351,7 @@ Here's an example of how this could be implemented. ### Backblaze B2 - [Pricing](https://www.backblaze.com/b2/cloud-storage-pricing.html): - - Data storage fees are 1/3 the price of S3 + - \$6/TB/month for storage (about 1/4 the price of S3) - Outbound (also called download or egress) data transfer fees are 1/4 the price of S3 - See [Backblaze Blog 2021-12-03: Why the world needs lower egress fees](https://www.backblaze.com/blog/why-the-world-needs-lower-egress-fees/) - [S3-compatible API](https://www.backblaze.com/b2/docs/s3_compatible_api.html)\* @@ -370,10 +372,26 @@ Here's an example of how this could be implemented. ### Cloudflare R2 -_Coming soon!_ - -- [Cloudflare Blog 2021-07-23: AWS's egregious egress](https://blog.cloudflare.com/aws-egregious-egress/) -- [Cloudflare Blog 2021-09-28: Announcing Cloudflare R2 Storage](https://blog.cloudflare.com/introducing-r2-object-storage/) +- [Pricing](https://developers.cloudflare.com/r2/platform/pricing/) + - \$15/TB/month for storage (about half the price of AWS S3, but over double the price of Backblaze B2) +- [S3-compatible API](https://developers.cloudflare.com/r2/platform/s3-compatibility/api/) +- URIs + - Regions are handled automatically. "When using the S3 API, the region for an R2 bucket is `auto`. For compatibility with tools that do not allow you to specify a region, an empty value and `us-east-1` will alias to the `auto` region." + - The Cloudflare account ID is included in bucket URIs, which is different from other platforms. + - Path style URL: `https://.r2.cloudflarestorage.com/` (they don't show position of bucket name in the docs?) + - Virtual-hosted-style URL: `https://..r2.cloudflarestorage.com` (added [2022-05-16](https://developers.cloudflare.com/r2/platform/changelog/#2022-05-16)) + - Presigned URLs are supported + - Added [2022-06-17](https://developers.cloudflare.com/r2/platform/changelog/#2022-06-17) + - Note that there may still be CORS limitations for client-side uploads ([cloudflare/cloudflare-docs#4455](https://github.com/cloudflare/cloudflare-docs/issues/4455#issuecomment-1170770935)) +- Identity and Access Management (IAM): + - [Requires generation of a static access key](https://developers.cloudflare.com/r2/data-access/s3-api/tokens/). Does not appear to support temporary credentials from IAM roles (AWS session tokens). Does not appear to support OpenID Connect (OIDC). + - Access keys can be set to either read-only or edit permissions. + - Access keys can be scoped to specific Cloudflare products, Cloudflare accounts, and IP addresses. +- Docs + - [Cloudflare R2 docs](https://developers.cloudflare.com/r2/) + - [Cloudflare Blog 2021-07-23: AWS's egregious egress](https://blog.cloudflare.com/aws-egregious-egress/) + - [Cloudflare Blog 2021-09-28: Announcing Cloudflare R2 Storage](https://blog.cloudflare.com/introducing-r2-object-storage/) + - [Cloudflare Blog 2022-09-21: R2 is now Generally Available](https://blog.cloudflare.com/r2-ga/) ### DigitalOcean Spaces diff --git a/docs/contributing.md b/docs/contributing.md index 66c3c5f..2237cfc 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -170,6 +170,9 @@ BACKBLAZE_B2_ACCESS_KEY_FASTENV="paste-here" BACKBLAZE_B2_SECRET_KEY_FASTENV="paste-here" BACKBLAZE_B2_BUCKET_HOST="paste-here" BACKBLAZE_B2_BUCKET_REGION="paste-here" +CLOUDFLARE_R2_ACCESS_KEY_FASTENV="paste-here" + CLOUDFLARE_R2_SECRET_KEY_FASTENV="paste-here" +CLOUDFLARE_R2_BUCKET_HOST="paste-here" # get AWS account ID from STS (replace fx with jq or other JSON parser as needed) AWS_ACCOUNT_ID=$(aws sts get-caller-identity | fx .Account) @@ -249,6 +252,12 @@ A [B2 application key](https://www.backblaze.com/b2/docs/application_keys.html) See the [Backblaze B2 S3-compatible API docs](https://www.backblaze.com/b2/docs/s3_compatible_api.html) for further info. +### GitHub Actions and Cloudflare R2 + +A [Cloudflare S3 auth token](https://developers.cloudflare.com/r2/data-access/s3-api/tokens/) (access key) is stored in GitHub Secrets, along with the corresponding bucket host in "virtual-hosted-style" format (`https://..r2.cloudflarestorage.com`). + +See the [Cloudflare R2 docs](https://developers.cloudflare.com/r2/) for further info. + ## Maintainers ### Merges diff --git a/fastenv/cloud/object_storage.py b/fastenv/cloud/object_storage.py index d557c2b..5161d62 100644 --- a/fastenv/cloud/object_storage.py +++ b/fastenv/cloud/object_storage.py @@ -26,11 +26,12 @@ class ObjectStorageConfig: """Configure S3-compatible object storage. --- - AWS S3 and Backblaze B2 are directly supported and tested. + AWS S3, Backblaze B2, and Cloudflare R2 are directly supported and tested. Buckets can be specified in "virtual-hosted-style", like - `.s3..amazonaws.com` for AWS S3 or - `.s3..backblazeb2.com` for Backblaze B2. + `.s3..amazonaws.com` for AWS S3, + `.s3..backblazeb2.com` for Backblaze B2, or + `..r2.cloudflarestorage.com` for Cloudflare R2. For AWS S3 only, the bucket can be also provided as just ``. If credentials are not provided as arguments, this class will auto-detect @@ -52,7 +53,7 @@ class ObjectStorageConfig: secret_key: str = dataclasses.field(repr=False) bucket_host: str bucket_name: str | None - bucket_region: str + bucket_region: str | None session_token: str | None = dataclasses.field(default=None, repr=False) def __init__( @@ -73,8 +74,10 @@ def __init__( if not bucket_host and not bucket_name: raise AttributeError( "Required bucket info not provided. Please provide a bucket, " - "like `.s3..amazonaws.com` for AWS S3 or " - "`.s3..backblazeb2.com` for Backblaze B2." + "like `.s3..amazonaws.com` for AWS S3, " + "`.s3..backblazeb2.com` for Backblaze B2, " + "or `..r2.cloudflarestorage.com` " + "for Cloudflare R2." ) elif bucket_host and not bucket_name: scheme = ( @@ -90,6 +93,15 @@ def __init__( ".backblazeb2.com" ): self.bucket_name = bucket_host.split(".s3.")[0] + elif ".cloudflarestorage.com" in bucket_host: + if ( + ".cloudflarestorage.com/" in bucket_host + and not bucket_host.endswith(".cloudflarestorage.com/") + ): + self.bucket_name = bucket_host.rsplit(sep="/", maxsplit=1)[1] + else: + self.bucket_name = bucket_host.rsplit(sep=".", maxsplit=4)[0] + bucket_region = "auto" else: self.bucket_name = None else: @@ -111,7 +123,10 @@ def __init__( f"Bucket host {self.bucket_host} does not " f"include bucket name {self.bucket_name}." ) - if self.bucket_region not in self.bucket_host: + if ( + ".cloudflarestorage.com" not in self.bucket_host + and self.bucket_region not in self.bucket_host + ): raise AttributeError( f"Bucket host {self.bucket_host} does not " f"include bucket region {self.bucket_region}." @@ -127,15 +142,16 @@ class ObjectStorageClient: """Instantiate a client to connect to S3-compatible object storage. --- - AWS S3 and Backblaze B2 are directly supported and tested. + AWS S3, Backblaze B2, and Cloudflare R2 are directly supported and tested. This class requires both an HTTPX client and an `ObjectStorageConfig` instance. They will be automatically instantiated if not provided as arguments. Any additional arguments will be used to instantiate `ObjectStorageConfig`. Buckets can be specified in "virtual-hosted-style", like - `.s3..amazonaws.com` for AWS S3 or - `.s3..backblazeb2.com` for Backblaze B2. + `.s3..amazonaws.com` for AWS S3, + `.s3..backblazeb2.com` for Backblaze B2, or + `..r2.cloudflarestorage.com` for Cloudflare R2. For AWS S3 only, the bucket can be also provided as just ``. https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html diff --git a/tests/cloud/test_object_storage.py b/tests/cloud/test_object_storage.py index dc50d65..f7fc1c9 100644 --- a/tests/cloud/test_object_storage.py +++ b/tests/cloud/test_object_storage.py @@ -75,6 +75,7 @@ def config_is_correct( config: fastenv.cloud.object_storage.ObjectStorageConfig, expected_bucket_host: str = example_bucket_host, expected_bucket_name: str = example_bucket_name, + expected_bucket_region: str = example_bucket_region, should_have_session_token: bool = False, ) -> bool: """Assert that an `ObjectStorageConfig` instance has the expected attributes.""" @@ -88,7 +89,7 @@ def config_is_correct( assert not config.session_token assert config.bucket_host == expected_bucket_host assert config.bucket_name == expected_bucket_name - assert config.bucket_region == self.example_bucket_region + assert config.bucket_region == expected_bucket_region return True @pytest.mark.parametrize("config_kwargs", example_config_kwargs_for_bucket) @@ -219,8 +220,22 @@ def test_config_if_bucket_name_contains_dots( @pytest.mark.parametrize( "bucket_host,bucket_region", ( - ("mybucket.s3.us-west-001.backblazeb2.com", "us-west-001"), - ("mybucket.nyc3.digitaloceanspaces.com", "nyc3"), + (f"{example_bucket_name}.s3.us-west-001.backblazeb2.com", "us-west-001"), + ( + ( + f"{example_bucket_name}" + ".ab12c3456d7e890fg1h234i5678j9012.r2.cloudflarestorage.com" + ), + None, + ), + ( + ( + "ab12c3456d7e890fg1h234i5678j9012.r2.cloudflarestorage.com/" + f"{example_bucket_name}" + ), + None, + ), + (f"{example_bucket_name}.nyc3.digitaloceanspaces.com", "nyc3"), ), ) @pytest.mark.parametrize("bucket_name", ("", None)) @@ -228,7 +243,7 @@ def test_config_if_not_bucket_name( self, bucket_host: str, bucket_name: str | None, - bucket_region: str, + bucket_region: str | None, mocker: MockerFixture, ) -> None: """Assert that, if a bucket name is not provided, `bucket_name` @@ -314,6 +329,25 @@ def test_config_if_scheme_in_bucket_host( assert self.config_is_correct(config, expected_bucket_host=expected_bucket_host) assert scheme not in config.bucket_host + def test_config_if_bucket_region_auto(self, mocker: MockerFixture) -> None: + """Assert that `bucket_region` is set to "auto" for Cloudflare R2.""" + mocker.patch.dict(os.environ, clear=True) + bucket_host = ( + f"{self.example_bucket_name_with_dots}" + ".ab12c3456d7e890fg1h234i5678j9012.r2.cloudflarestorage.com" + ) + config = fastenv.cloud.object_storage.ObjectStorageConfig( + access_key=self.example_access_key, + secret_key=self.example_secret_key, + bucket_host=bucket_host, + ) + assert self.config_is_correct( + config, + expected_bucket_host=bucket_host, + expected_bucket_name=self.example_bucket_name_with_dots, + expected_bucket_region="auto", + ) + class TestObjectStorageClientUnit: """Test `class ObjectStorageClient` and its methods. diff --git a/tests/conftest.py b/tests/conftest.py index f20bcfa..949e6f8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,6 +58,13 @@ def anyio_backend() -> str: bucket_host_variable="BACKBLAZE_B2_BUCKET_HOST", bucket_region_variable="BACKBLAZE_B2_BUCKET_REGION", ) +_cloud_params_cloudflare_static = CloudParams( + access_key_variable="CLOUDFLARE_R2_ACCESS_KEY_FASTENV", + secret_key_variable="CLOUDFLARE_R2_SECRET_KEY_FASTENV", + session_token_variable="", + bucket_host_variable="CLOUDFLARE_R2_BUCKET_HOST", + bucket_region_variable=None, +) @pytest.fixture( @@ -65,6 +72,7 @@ def anyio_backend() -> str: _cloud_params_aws_session, _cloud_params_aws_static, _cloud_params_backblaze_static, + _cloud_params_cloudflare_static, ), scope="session", ) @@ -90,7 +98,7 @@ def object_storage_config( else request_param.session_token_variable ) bucket_host = os.getenv(request_param.bucket_host_variable) - bucket_region = os.getenv(request_param.bucket_region_variable, "us-east-2") + bucket_region = os.getenv(request_param.bucket_region_variable) if not access_key or not secret_key or session_token is None: # pragma: no cover pytest.skip("Required cloud credentials not present.") return fastenv.cloud.object_storage.ObjectStorageConfig(