From 526eefa420006cf257c745ae9b89030d0fc88d00 Mon Sep 17 00:00:00 2001 From: Robele Baker Date: Mon, 10 Jun 2024 11:42:06 -0700 Subject: [PATCH] Allow base64-service-account-json key auth Issue: #923 (#1245) * added base64 functionality and basic testing * Change log * fix conftest to allow json * Change method name from camel to snake case * change type hinting to be py3.9 compatible --------- Co-authored-by: Robele Baker <> --- .../unreleased/Features-20240516-125735.yaml | 6 ++ dbt/adapters/bigquery/connections.py | 6 +- dbt/adapters/bigquery/utility.py | 40 ++++++++- tests/conftest.py | 3 + tests/functional/adapter/test_json_keyfile.py | 82 +++++++++++++++++++ 5 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Features-20240516-125735.yaml create mode 100644 tests/functional/adapter/test_json_keyfile.py diff --git a/.changes/unreleased/Features-20240516-125735.yaml b/.changes/unreleased/Features-20240516-125735.yaml new file mode 100644 index 000000000..d84b098b2 --- /dev/null +++ b/.changes/unreleased/Features-20240516-125735.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add support for base 64 encoded json keyfile credentials +time: 2024-05-16T12:57:35.383416-07:00 +custom: + Author: robeleb1 + Issue: "923" diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index f96bc1381..4a3feae48 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -40,9 +40,11 @@ from dbt.adapters.events.types import SQLQuery from dbt_common.events.functions import fire_event from dbt.adapters.bigquery import __version__ as dbt_version +from dbt.adapters.bigquery.utility import is_base64, base64_to_string from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum + logger = AdapterLogger("BigQuery") BQ_QUERY_JOB_SPLIT = "-----Query Job SQL Follows-----" @@ -125,7 +127,7 @@ class BigQueryCredentials(Credentials): job_creation_timeout_seconds: Optional[int] = None job_execution_timeout_seconds: Optional[int] = None - # Keyfile json creds + # Keyfile json creds (unicode or base 64 encoded) keyfile: Optional[str] = None keyfile_json: Optional[Dict[str, Any]] = None @@ -332,6 +334,8 @@ def get_google_credentials(cls, profile_credentials) -> GoogleCredentials: elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT_JSON: details = profile_credentials.keyfile_json + if is_base64(profile_credentials.keyfile_json): + details = base64_to_string(details) return creds.from_service_account_info(details, scopes=profile_credentials.scopes) elif method == BigQueryConnectionMethod.OAUTH_SECRETS: diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py index 5914280a3..557986b38 100644 --- a/dbt/adapters/bigquery/utility.py +++ b/dbt/adapters/bigquery/utility.py @@ -1,5 +1,7 @@ +import base64 +import binascii import json -from typing import Any, Optional +from typing import Any, Optional, Union import dbt_common.exceptions @@ -43,3 +45,39 @@ def sql_escape(string): if not isinstance(string, str): raise dbt_common.exceptions.CompilationError(f"cannot escape a non-string: {string}") return json.dumps(string)[1:-1] + + +def is_base64(s: Union[str, bytes]) -> bool: + """ + Checks if the given string or bytes object is valid Base64 encoded. + + Args: + s: The string or bytes object to check. + + Returns: + True if the input is valid Base64, False otherwise. + """ + + if isinstance(s, str): + # For strings, ensure they consist only of valid Base64 characters + if not s.isascii(): + return False + # Convert to bytes for decoding + s = s.encode("ascii") + + try: + # Use the 'validate' parameter to enforce strict Base64 decoding rules + base64.b64decode(s, validate=True) + return True + except TypeError: + return False + except binascii.Error: # Catch specific errors from the base64 module + return False + + +def base64_to_string(b): + return base64.b64decode(b).decode("utf-8") + + +def string_to_base64(s): + return base64.b64encode(s.encode("utf-8")) diff --git a/tests/conftest.py b/tests/conftest.py index 78f3d82e1..6dc9e6443 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import pytest import os import json +from dbt.adapters.bigquery.utility import is_base64, base64_to_string # Import the fuctional fixtures as a plugin # Note: fixtures with session scope need to be local @@ -38,6 +39,8 @@ def oauth_target(): def service_account_target(): credentials_json_str = os.getenv("BIGQUERY_TEST_SERVICE_ACCOUNT_JSON").replace("'", "") + if is_base64(credentials_json_str): + credentials_json_str = base64_to_string(credentials_json_str) credentials = json.loads(credentials_json_str) project_id = credentials.get("project_id") return { diff --git a/tests/functional/adapter/test_json_keyfile.py b/tests/functional/adapter/test_json_keyfile.py new file mode 100644 index 000000000..91e41a3f1 --- /dev/null +++ b/tests/functional/adapter/test_json_keyfile.py @@ -0,0 +1,82 @@ +import json +import pytest +from dbt.adapters.bigquery.utility import string_to_base64, is_base64 + + +@pytest.fixture +def example_json_keyfile(): + keyfile = json.dumps( + { + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "", + } + ) + + return keyfile + + +@pytest.fixture +def example_json_keyfile_b64(): + keyfile = json.dumps( + { + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "", + } + ) + + return string_to_base64(keyfile) + + +def test_valid_base64_strings(example_json_keyfile_b64): + valid_strings = [ + "SGVsbG8gV29ybGQh", # "Hello World!" + "Zm9vYmFy", # "foobar" + "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string + "", # Empty string + example_json_keyfile_b64.decode("utf-8"), + ] + + for s in valid_strings: + assert is_base64(s) is True + + +def test_valid_base64_bytes(example_json_keyfile_b64): + valid_bytes = [ + b"SGVsbG8gV29ybGQh", # "Hello World!" + b"Zm9vYmFy", # "foobar" + b"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string + b"", # Empty bytes + example_json_keyfile_b64, + ] + for s in valid_bytes: + assert is_base64(s) is True + + +def test_invalid_base64(example_json_keyfile): + invalid_inputs = [ + "This is not Base64", + "SGVsbG8gV29ybGQ", # Incorrect padding + "Invalid#Base64", + 12345, # Not a string or bytes + b"Invalid#Base64", + "H\xffGVsbG8gV29ybGQh", # Contains invalid character \xff + example_json_keyfile, + ] + for s in invalid_inputs: + assert is_base64(s) is False