-
Notifications
You must be signed in to change notification settings - Fork 5
149 lines (112 loc) · 6.66 KB
/
pull_request_build.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
name: Test and Check on Pull Request
on: # yamllint disable-line rule:truthy
pull_request:
paths:
- transform/*
- transform/**/*
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# This cancels a run if another change is pushed to the same branch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
dbt:
if: ${{ github.event_name == 'pull_request' && steps.changed_files.outputs.files_contains_transform == 'true' }}
name: Pull Request dbt Tests
runs-on: ubuntu-latest
# Set environment variables in
# https://github.com//<your org>/<your repo>/settings/variables/actions
#
# Alternatively, You can define multiple ENV for different workflows.
# https://github.com/<org>/<repo>/settings/environments
# environment: PR_ENV
# most people should use this one
container: datacoves/ci-basic-dbt-snowflake:3.2
defaults:
run:
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
env:
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
DATACOVES__YAML_DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/schedule
DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }}
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }}_PR_${{ github.event.number }}
DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }}
DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }}
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }}
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }}
DATACOVES__MAIN__PASSWORD: ${{ secrets.DATACOVES__MAIN__PASSWORD }}
# This is used by datacoves to drop the test database if permissions
# cannot be applied when using the Datacoves permifrost security model.
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}
steps:
- name: Checkout branch
uses: actions/checkout@v3.5.0
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Set Secure Directory
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}
- name: Check for transform changes
id: changed_files
run: echo ::set-output name=files_contains_transform::$(if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q '^transform/'; then echo 'true'; else echo 'false'; fi)
- name: List of files changed
run: "git diff origin/${{ github.event.pull_request.base.ref }} HEAD --name-status"
- name: Install dbt packages
run: "dbt deps"
- name: Create PR database
run: "dbt --no-write-json run-operation create_database"
- name: Get prod manifest
id: prod_manifest
run: "../automate/dbt/get_artifacts.sh"
##### Governance Checks
# this first runs dbt but creates enpty tables, this is enough to then run the hooks and fail fast
- name: Governance run of dbt with EMPTY models using slim mode
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty"
- name: Governance run of dbt with EMPTY models using full run
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
run: "dbt build --fail-fast --empty"
- name: Generate Docs Combining Prod and branch catalog.json
run: "dbt-coves generate docs --merge-deferred --state logs"
- name: Run governance checks
run: "pre-commit run --from-ref origin/${{ github.event.pull_request.base.ref }} --to-ref HEAD"
##### Real dbt run given that we passed governance checks
- name: Run dbt build slim mode
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt build --fail-fast --defer --state logs --select state:modified+"
- name: Run dbt build full run
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
run: "dbt build --fail-fast"
- name: Grant access to PR database
id: grant-access-to-database
run: "dbt --no-write-json run-operation grant_access_to_pr_database"
# We drop the database when there is a failure to grant access to the db because
# most likely the schema was not set properly in dbt_project.yml so models built to default schema
- name: Drop PR database on Failure to grant security access
if: always() && (env.DATACOVES__DROP_DB_ON_FAIL == 'true') && (steps.grant-access-to-database.outcome == 'failure')
run: "dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{env.DATACOVES__MAIN__DATABASE}}, recreate: False}'" # yamllint disable-line rule:line-length
airflow:
if: ${{ github.event_name == 'pull_request' && steps.changed_files.outputs.files_contains_orchestrate == 'true' }}
name: Pull Request Airflow Tests
runs-on: ubuntu-latest
if: ${{ contains(github.event.head_commit.modified, 'orchestrate/') }}
container: datacoves/ci-airflow-dbt-snowflake:3.2
env:
AIRBYTE__EXTRACT_LOCATION: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/load
AIRFLOW__CORE__DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/airflow/dags
AIRFLOW__CORE__DAGBAG_IMPORT_TIMEOUT: 300
steps:
- name: Checkout branch
uses: actions/checkout@v3.5.0
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Check for orchestrate changes
id: changed_files
run: echo ::set-output name=files_contains_orchestrate::$(if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q '^orchestrate/'; then echo 'true'; else echo 'false'; fi)
- name: Test DAG structure integrity (DagBag Loading)
run: "python /usr/app/load_dagbag.py"
- name: Test DBT Sources against DAGs' YAML files
run: "python /usr/app/test_dags.py --dag-loadtime-threshold 1 --check-variable-usage"