From 4886277947690b10ba7907b87885659b0486fe8f Mon Sep 17 00:00:00 2001 From: Robele Baker Date: Thu, 16 May 2024 12:05:27 -0700 Subject: [PATCH 1/5] added base64 functionality and basic testing --- dbt/adapters/bigquery/connections.py | 6 +- dbt/adapters/bigquery/utility.py | 38 +++++++++ tests/functional/adapter/test_json_keyfile.py | 82 +++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 tests/functional/adapter/test_json_keyfile.py diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index f96bc1381..6397fe92e 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -40,9 +40,11 @@ from dbt.adapters.events.types import SQLQuery from dbt_common.events.functions import fire_event from dbt.adapters.bigquery import __version__ as dbt_version +from dbt.adapters.bigquery.utility import is_base64, base64ToString from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum + logger = AdapterLogger("BigQuery") BQ_QUERY_JOB_SPLIT = "-----Query Job SQL Follows-----" @@ -125,7 +127,7 @@ class BigQueryCredentials(Credentials): job_creation_timeout_seconds: Optional[int] = None job_execution_timeout_seconds: Optional[int] = None - # Keyfile json creds + # Keyfile json creds (unicode or base 64 encoded) keyfile: Optional[str] = None keyfile_json: Optional[Dict[str, Any]] = None @@ -332,6 +334,8 @@ def get_google_credentials(cls, profile_credentials) -> GoogleCredentials: elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT_JSON: details = profile_credentials.keyfile_json + if is_base64(profile_credentials.keyfile_json): + details = base64ToString(details) return creds.from_service_account_info(details, scopes=profile_credentials.scopes) elif method == BigQueryConnectionMethod.OAUTH_SECRETS: diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py index 5914280a3..44d5fbf76 100644 --- a/dbt/adapters/bigquery/utility.py +++ b/dbt/adapters/bigquery/utility.py @@ -1,3 +1,5 @@ +import base64 +import binascii import json from typing import Any, Optional @@ -43,3 +45,39 @@ def sql_escape(string): if not isinstance(string, str): raise dbt_common.exceptions.CompilationError(f"cannot escape a non-string: {string}") return json.dumps(string)[1:-1] + + +def is_base64(s: str | bytes) -> bool: + """ + Checks if the given string or bytes object is valid Base64 encoded. + + Args: + s: The string or bytes object to check. + + Returns: + True if the input is valid Base64, False otherwise. + """ + + if isinstance(s, str): + # For strings, ensure they consist only of valid Base64 characters + if not s.isascii(): + return False + # Convert to bytes for decoding + s = s.encode("ascii") + + try: + # Use the 'validate' parameter to enforce strict Base64 decoding rules + base64.b64decode(s, validate=True) + return True + except TypeError: + return False + except binascii.Error: # Catch specific errors from the base64 module + return False + + +def base64ToString(b): + return base64.b64decode(b).decode("utf-8") + + +def stringToBase64(s): + return base64.b64encode(s.encode("utf-8")) diff --git a/tests/functional/adapter/test_json_keyfile.py b/tests/functional/adapter/test_json_keyfile.py new file mode 100644 index 000000000..58b9a107b --- /dev/null +++ b/tests/functional/adapter/test_json_keyfile.py @@ -0,0 +1,82 @@ +import json +import pytest +from dbt.adapters.bigquery.utility import stringToBase64, is_base64 + + +@pytest.fixture +def example_json_keyfile(): + keyfile = json.dumps( + { + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "", + } + ) + + return keyfile + + +@pytest.fixture +def example_json_keyfile_b64(): + keyfile = json.dumps( + { + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "", + } + ) + + return stringToBase64(keyfile) + + +def test_valid_base64_strings(example_json_keyfile_b64): + valid_strings = [ + "SGVsbG8gV29ybGQh", # "Hello World!" + "Zm9vYmFy", # "foobar" + "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string + "", # Empty string + example_json_keyfile_b64.decode("utf-8"), + ] + + for s in valid_strings: + assert is_base64(s) is True + + +def test_valid_base64_bytes(example_json_keyfile_b64): + valid_bytes = [ + b"SGVsbG8gV29ybGQh", # "Hello World!" + b"Zm9vYmFy", # "foobar" + b"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string + b"", # Empty bytes + example_json_keyfile_b64, + ] + for s in valid_bytes: + assert is_base64(s) is True + + +def test_invalid_base64(example_json_keyfile): + invalid_inputs = [ + "This is not Base64", + "SGVsbG8gV29ybGQ", # Incorrect padding + "Invalid#Base64", + 12345, # Not a string or bytes + b"Invalid#Base64", + "H\xffGVsbG8gV29ybGQh", # Contains invalid character \xff + example_json_keyfile, + ] + for s in invalid_inputs: + assert is_base64(s) is False From b49b596965839cddbd687b75126d506e7ea7f3e0 Mon Sep 17 00:00:00 2001 From: Robele Baker Date: Thu, 16 May 2024 12:57:42 -0700 Subject: [PATCH 2/5] Change log --- .changes/unreleased/Features-20240516-125735.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20240516-125735.yaml diff --git a/.changes/unreleased/Features-20240516-125735.yaml b/.changes/unreleased/Features-20240516-125735.yaml new file mode 100644 index 000000000..d84b098b2 --- /dev/null +++ b/.changes/unreleased/Features-20240516-125735.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add support for base 64 encoded json keyfile credentials +time: 2024-05-16T12:57:35.383416-07:00 +custom: + Author: robeleb1 + Issue: "923" From 5c12bc55f2f0cb95b4def6963f9934c9e6cfdd74 Mon Sep 17 00:00:00 2001 From: Robele Baker Date: Thu, 16 May 2024 13:17:54 -0700 Subject: [PATCH 3/5] fix conftest to allow json --- tests/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 78f3d82e1..90ed47387 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import pytest import os import json +from dbt.adapters.bigquery.utility import is_base64, base64ToString # Import the fuctional fixtures as a plugin # Note: fixtures with session scope need to be local @@ -38,6 +39,8 @@ def oauth_target(): def service_account_target(): credentials_json_str = os.getenv("BIGQUERY_TEST_SERVICE_ACCOUNT_JSON").replace("'", "") + if is_base64(credentials_json_str): + credentials_json_str = base64ToString(credentials_json_str) credentials = json.loads(credentials_json_str) project_id = credentials.get("project_id") return { From edb1b39249e20582840b1731452f077971dc75d8 Mon Sep 17 00:00:00 2001 From: Robele Baker <> Date: Sat, 1 Jun 2024 01:43:13 -0700 Subject: [PATCH 4/5] Change method name from camel to snake case --- dbt/adapters/bigquery/connections.py | 4 ++-- dbt/adapters/bigquery/utility.py | 4 ++-- tests/conftest.py | 4 ++-- tests/functional/adapter/test_json_keyfile.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index 6397fe92e..4a3feae48 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -40,7 +40,7 @@ from dbt.adapters.events.types import SQLQuery from dbt_common.events.functions import fire_event from dbt.adapters.bigquery import __version__ as dbt_version -from dbt.adapters.bigquery.utility import is_base64, base64ToString +from dbt.adapters.bigquery.utility import is_base64, base64_to_string from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum @@ -335,7 +335,7 @@ def get_google_credentials(cls, profile_credentials) -> GoogleCredentials: elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT_JSON: details = profile_credentials.keyfile_json if is_base64(profile_credentials.keyfile_json): - details = base64ToString(details) + details = base64_to_string(details) return creds.from_service_account_info(details, scopes=profile_credentials.scopes) elif method == BigQueryConnectionMethod.OAUTH_SECRETS: diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py index 44d5fbf76..1ebf73c5d 100644 --- a/dbt/adapters/bigquery/utility.py +++ b/dbt/adapters/bigquery/utility.py @@ -75,9 +75,9 @@ def is_base64(s: str | bytes) -> bool: return False -def base64ToString(b): +def base64_to_string(b): return base64.b64decode(b).decode("utf-8") -def stringToBase64(s): +def string_to_base64(s): return base64.b64encode(s.encode("utf-8")) diff --git a/tests/conftest.py b/tests/conftest.py index 90ed47387..6dc9e6443 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,7 @@ import pytest import os import json -from dbt.adapters.bigquery.utility import is_base64, base64ToString +from dbt.adapters.bigquery.utility import is_base64, base64_to_string # Import the fuctional fixtures as a plugin # Note: fixtures with session scope need to be local @@ -40,7 +40,7 @@ def oauth_target(): def service_account_target(): credentials_json_str = os.getenv("BIGQUERY_TEST_SERVICE_ACCOUNT_JSON").replace("'", "") if is_base64(credentials_json_str): - credentials_json_str = base64ToString(credentials_json_str) + credentials_json_str = base64_to_string(credentials_json_str) credentials = json.loads(credentials_json_str) project_id = credentials.get("project_id") return { diff --git a/tests/functional/adapter/test_json_keyfile.py b/tests/functional/adapter/test_json_keyfile.py index 58b9a107b..91e41a3f1 100644 --- a/tests/functional/adapter/test_json_keyfile.py +++ b/tests/functional/adapter/test_json_keyfile.py @@ -1,6 +1,6 @@ import json import pytest -from dbt.adapters.bigquery.utility import stringToBase64, is_base64 +from dbt.adapters.bigquery.utility import string_to_base64, is_base64 @pytest.fixture @@ -40,7 +40,7 @@ def example_json_keyfile_b64(): } ) - return stringToBase64(keyfile) + return string_to_base64(keyfile) def test_valid_base64_strings(example_json_keyfile_b64): From 63e2f0836ee5ac1a66624f806a3aadac2cd6a5d3 Mon Sep 17 00:00:00 2001 From: Robele Baker <> Date: Mon, 3 Jun 2024 07:30:37 -0700 Subject: [PATCH 5/5] change type hinting to be py3.9 compatible --- dbt/adapters/bigquery/utility.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py index 1ebf73c5d..557986b38 100644 --- a/dbt/adapters/bigquery/utility.py +++ b/dbt/adapters/bigquery/utility.py @@ -1,7 +1,7 @@ import base64 import binascii import json -from typing import Any, Optional +from typing import Any, Optional, Union import dbt_common.exceptions @@ -47,7 +47,7 @@ def sql_escape(string): return json.dumps(string)[1:-1] -def is_base64(s: str | bytes) -> bool: +def is_base64(s: Union[str, bytes]) -> bool: """ Checks if the given string or bytes object is valid Base64 encoded.