Skip to content

Commit

Permalink
Feat/artifact from dbt cloud (#67)
Browse files Browse the repository at this point in the history
* feat: add skeleton

* feat: artifact download implement

* docs: add guide of dbt cloud usage

* docs: dbt cloud cli ref

* test: 96%
  • Loading branch information
datnguye authored Jan 6, 2024
1 parent 5ba8122 commit 51714a4
Show file tree
Hide file tree
Showing 9 changed files with 286 additions and 0 deletions.
4 changes: 4 additions & 0 deletions dbterd/adapters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dbterd import default
from dbterd.adapters import adapter
from dbterd.adapters.dbt_cloud import DbtCloudArtifact
from dbterd.adapters.dbt_invocation import DbtInvocation
from dbterd.adapters.filter import has_unsupported_rule
from dbterd.helpers import cli_messaging
Expand Down Expand Up @@ -57,6 +58,9 @@ def evaluate_kwargs(self, **kwargs) -> dict:
if kwargs.get("dbt_auto_artifacts"):
self.dbt.get_artifacts_for_erd()
artifacts_dir = f"{dbt_project_dir}/target"
elif kwargs.get("dbt_cloud"):
artifacts_dir = f"{dbt_project_dir}/target"
DbtCloudArtifact(**kwargs).get(artifacts_dir=artifacts_dir)
else:
unsupported, rule = has_unsupported_rule(
rules=select.extend(exclude) if exclude else select
Expand Down
101 changes: 101 additions & 0 deletions dbterd/adapters/dbt_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
import json

import click
import requests

from dbterd.helpers import file
from dbterd.helpers.log import logger


class DbtCloudArtifact:
"""dbt Cloud Artifact class using
dbt CLoud Administrative API
https://docs.getdbt.com/docs/dbt-cloud-apis/admin-cloud-api.
And use Retrieve Run Artifact endpoint, for example, with v2 spec
https://docs.getdbt.com/dbt-cloud/api-v2#/operations/Retrieve%20Run%20Artifact
"""

def __init__(self, **kwargs) -> None:
"""Initialize the base attributes to interact with API service"""
self.host_url = kwargs.get("dbt_cloud_host_url")
self.service_token = kwargs.get("dbt_cloud_service_token")
self.account_id = kwargs.get("dbt_cloud_account_id")
self.run_id = kwargs.get("dbt_cloud_run_id")
self.api_version = kwargs.get("dbt_cloud_api_version")

@property
def request_headers(self) -> dict:
"""API Header"""
return {"Authorization": f"Token {self.service_token}"}

@property
def api_endpoint(self) -> dict:
"""Base API endpoint to a specific artifact object"""
return (
"https://{host_url}/api/{api_version}/"
"accounts/{account_id}/"
"runs/{run_id}/"
"artifacts/{{path}}"
).format(
host_url=self.host_url,
api_version=self.api_version,
account_id=self.account_id,
run_id=self.run_id,
)

@property
def manifest_api_endpoint(self) -> dict:
"""Full API endpoint to the `manifest.json` file"""
return self.api_endpoint.format(path="manifest.json")

@property
def catalog_api_endpoint(self) -> dict:
"""Full API endpoint to the `catalog.json` file"""
return self.api_endpoint.format(path="catalog.json")

def download_artifact(self, artifact: str, artifacts_dir: str) -> bool:
"""Request API to download the artifact file
Args:
artifact (str): The artifact name e.g. manifest or catalog
Returns:
bool: True is success, False if any errors
"""
artifact_api_endpoint = getattr(self, f"{artifact}_api_endpoint")
logger.info(f"Dowloading...[URL: {artifact_api_endpoint}]")
try:
r = requests.get(url=artifact_api_endpoint, headers=self.request_headers)
logger.info(f"Completed [status: {r.status_code}]")

if r.status_code != 200:
logger.error(f"Failed to retrieve artifacts [error: {vars(r)}]")
return False

file.write_json(
data=json.dumps(r.json(), indent=2),
path=f"{artifacts_dir}/{artifact}.json",
)
except click.BadParameter as e:
logger.error(f"Error occurred while downloading [error: {str(e)}]")
return False

return True

def get(self, artifacts_dir: str = None) -> bool:
"""Download `manifest.json` and `catalog.json` to the local dir
Args:
artifacts_dir (str, optional): Local dir where the artifacts get downloaded to. Default to CWD/target.
Returns:
bool: True is success, False if any errors
"""
_artifacts_dir = artifacts_dir or f"{os.getcwd()}/target"
r = self.download_artifact(artifact="manifest", artifacts_dir=_artifacts_dir)
if r:
r = self.download_artifact(artifact="catalog", artifacts_dir=_artifacts_dir)

return r
59 changes: 59 additions & 0 deletions dbterd/cli/params.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import functools

import click
Expand Down Expand Up @@ -112,6 +113,64 @@ def common_params(func):
show_default=True,
type=click.STRING,
)
@click.option(
"--dbt-cloud",
help=(
"Flag to download dbt artifact files using dbt Cloud API. "
"This requires the additional parameters to be able to connection to dbt Cloud API"
),
is_flag=True,
default=False,
show_default=True,
)
@click.option(
"--dbt-cloud-host-url",
help=(
"Configure dbt Cloud's Host URL. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_HOST_URL) if not specified. "
"Sample dbt Cloud Run URL: "
"https://<HOST_URL>/deploy/<ACCOUNT_ID>/projects/irrelevant/runs/<RUN_ID>"
),
default=os.environ.get("DBTERD_DBT_CLOUD_HOST_URL", "cloud.getdbt.com"),
show_default=True,
)
@click.option(
"--dbt-cloud-account-id",
help=(
"Configure dbt Cloud's Account ID. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_ACCOUNT_ID) if not specified"
),
default=os.environ.get("DBTERD_DBT_CLOUD_ACCOUNT_ID"),
show_default=True,
)
@click.option(
"--dbt-cloud-run-id",
help=(
"Configure dbt Cloud's completed Run ID. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_RUN_ID) if not specified"
),
default=os.environ.get("DBTERD_DBT_CLOUD_RUN_ID"),
show_default=True,
)
@click.option(
"--dbt-cloud-service-token",
help=(
"Configure dbt Service Token (Permissions: Job Admin). "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_SERVICE_TOKEN) if not specified. "
"Visit https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens to see how to generate it. "
),
default=os.environ.get("DBTERD_DBT_CLOUD_SERVICE_TOKEN"),
show_default=True,
)
@click.option(
"--dbt-cloud-api-version",
help=(
"Configure dbt Cloud Administrative API version. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_API_VERSION) if not specified."
),
default=os.environ.get("DBTERD_DBT_CLOUD_API_VERSION", "v2"),
show_default=True,
)
@functools.wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs) # pragma: no cover
Expand Down
11 changes: 11 additions & 0 deletions dbterd/helpers/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,14 @@ def read_catalog(path: str, version: int = None):
parser_version = default_parser
parse_func = getattr(parser, parser_version)
return parse_func(catalog=_dict)


def write_json(data, path: str):
"""Persist json data to file
Args:
data (json): Json data
path (str): File path
"""
with open(path, "w") as file:
file.write(data)
14 changes: 14 additions & 0 deletions docs/nav/guide/cli-references.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,20 @@ Currently, it supports the following keys in the format:
dbterd run --entity-name-format table # with table name only
```

### dbterd run --dbt-cloud

Decide to download artifact files from dbt Cloud Job Run instead of compiling locally.

Check [Download artifacts from a Job Run](./dbt-cloud/download-artifact-from-a-job-run.md) for more details.

**Examples:**
=== "CLI"

```bash
dbterd run --dbt-cloud
dbterd run --dbt-cloud --select wildcard:*transaction*
```

## dbterd debug

Shows hidden configured values, which will help us to see what configs are passed into and how they are evaluated to be used.
Expand Down
76 changes: 76 additions & 0 deletions docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Download artifacts from a Job Run

This is a guideline on how to download `manifest.json` and `catalog.json` from a Job Run by using [dbt CLoud Administrative API](https://docs.getdbt.com/docs/dbt-cloud-apis/admin-cloud-api), under the [Retrieve Run Artifact](https://docs.getdbt.com/dbt-cloud/api-v2#/operations/Retrieve%20Run%20Artifact) endpoint. Therefore, we don't need to run `dbt docs generate` locally anymore.

In order to support dbt Cloud users, `dbterd` is now having multiple CLI options starting with `--dbt-cloud` to let us configure the connection to the complete dbt Cloud Job Run.

!!! note "Prerequisites"
- You have a dbt Cloud account with [Team and Enterprise plans](https://www.getdbt.com/pricing/) 💰
- You have a job or go create a new job with a single step 🏃

```bash
dbt docs generate
```
- Make sure that you have at least 1 successful run ✅

## 1. Prepare the environment variables

Behind the scene, the API Endpoint will look like: `https://{host_url}/api/{api_version}/accounts/{account_id}/runs/{run_id}/artifacts/{path}`.

And the dbt Cloud's Job Rub will have the URL constructed as `https://<host_url>/deploy/<account_id>/projects/irrelevant/runs/<run_id>`.

In the above:

| URL Part | Environment Variable | CLI Option | Description |
|-------------------|---------------------------------|---------------------------|---------------------------------------------------------------------------|
| `host_url` | `DBTERD_DBT_CLOUD_HOST_URL` | `--dbt-cloud-host-url` | Host URL, also known as [Access URL](https://docs.getdbt.com/docs/cloud/about-cloud/regions-ip-addresses) (Default to `cloud.getdbt.com`) |
| `account_id` | `DBTERD_DBT_CLOUD_ACCOUNT_ID` | `--dbt-cloud-account-id` | dbt Cloud Account ID |
| `run_id` | `DBTERD_DBT_CLOUD_RUN_ID` | `--dbt-cloud-run-id` | dbt Cloud successful job run ID |
| `api_version` | `DBTERD_DBT_CLOUD_API_VERSION` | `--dbt-cloud-api-version` | dbt Cloud API version (Default to `v2`) |
| `path` | `N/A` | `N/A` | Artifact relative file path. You don't need to care about this part as `dbterd` managed it already |

Besides, we need another one which is very important, the service token:

- Go to **Account settings** / **Service tokens**. Click _+ New token_
- Enter _Service token name_ e.g. "ST_dbterd"
- Click _Add_ and select `Job Admin` permission. Optionally, select the right project or all by default
- Click _Save_
- Copy token & Pass it to the Environment Variable (`DBTERD_DBT_CLOUD_SERVICE_TOKEN`) or the CLI Option (`--dbt-cloud-service-token`)

Finally, fill in `your_value` and execute the (Linux or Macos) command below:

```bash
export DBTERD_DBT_CLOUD_SERVICE_TOKEN=your_value
export DBTERD_DBT_CLOUD_HOST_URL=your_value
export DBTERD_DBT_CLOUD_ACCOUNT_ID=your_value
export DBTERD_DBT_CLOUD_RUN_ID=your_value
export DBTERD_DBT_CLOUD_RUN_ID=your_value
```

## 2. Genrate ERD file

We're going to use `--dbt-cloud` option to tell `dbterd` to use dbt Cloud API with all above variables.

The command will be looks like:

```bash
dbterd run -s <dbterd selection> --dbt-cloud
```

> NOTE: You can not use `--dbt` option together with `--dbt-cloud`
and then, here is the sample console log:

```log
dbterd - INFO - Run with dbterd==1.0.0 (main.py:54)
dbterd - INFO - Using dbt project dir at: C:\Sources\dbterd (base.py:46)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/manifest.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/catalog.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Using dbt artifact dir at: hidden (base.py:73)
dbterd - INFO - Collected 4 table(s) and 3 relationship(s) (test_relationship.py:59)
dbterd - INFO - C:\Sources\dbterd\target/output.dbml (base.py:170)
```

Voila! Happy ERD 🎉!
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ nav:
- Metadata:
- Ignore Tests: nav/metadata/ignore_in_erd.md
- Relationship Types: nav/metadata/relationship_type.md
- dbt Cloud:
- Download artifacts from a Job Run: nav/guide/dbt-cloud/download-artifact-from-a-job-run.md
- Development:
- Contribution Guideline: nav/development/contributing-guide.md
- Change Log: nav/development/CHANGELOG.md
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/adapters/test_dbt_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# from unittest import mock

# import click
# import pytest

# from dbterd.adapters.dbt_cloud import DbtCloudArtifact


class TestDbtCloudArtifact:
def test_download_artifact(self):
pass

def test_get(self):
pass
5 changes: 5 additions & 0 deletions tests/unit/helpers/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,8 @@ def test_read_catalog_error(self, mock_open_json, version):
with pytest.raises(ValueError):
file.read_catalog(path="path/to/catalog", version=version)
mock_open_json.assert_called_with("path/to/catalog/catalog.json")

@mock.patch("builtins.open")
def test_write_json(self, mock_open):
file.write_json(data={}, path="path/to/catalog/catalog.json")
mock_open.assert_called_with("path/to/catalog/catalog.json", "w")

0 comments on commit 51714a4

Please sign in to comment.