diff --git a/scripts/ddl/README.md b/scripts/ddl/README.md new file mode 100644 index 000000000..da1399b2a --- /dev/null +++ b/scripts/ddl/README.md @@ -0,0 +1,42 @@ +# Structure of the CI database (`dbt-test-env`) + +## `adapter` + +### Objects + +- [schema](adapter/schema.sql) +- views + - [test_schema](adapter/views/test_schema.sql) + - [test_schema_summary](adapter/views/test_schema_summary.sql) +- procedures + - [drop_test_schemas](adapter/procedures/drop_test_schemas.sql) + +> Please refer to each DDL file, linked for convenience, for details about the particular object + +### Description + +The `adapter` schema contains objects that facilitate the maintenance of the test environment. +The following tasks can be accomplished within this environment: +- identify orphan test schemas +- identify integration tests that are creating orphan schemas +- drop orphan schemas + +## `test%_test_%` + +These schemas are temporary schemas created in the course of integration testing. +They are of the format `test_`, e.g. `test12345678901234567890_test_basic`. +The `unique-hash` part is a combination of a timestamp and a random four-digit integer. +The `test-item` part is either a test module (usually), test class, or test function. + +## `dbt_` + +These schemas are user workspaces that allow for development and troubleshooting. +Each schema is up to the user for maintaining and there are no scripts persisted in this repo. + +## Additional schemas out of scope of this application + +- `always_sunny_%`: These schemas are used by `always-sunny`, and contain persistent objects +- `dbt_cloud_pr_%`: Used by Cloud's CI process? +- `us_q_team_pristine_prod_%`: Used by Quality Team's Pristine setups? +- `dbt_us_bigquery_%` +- `semantic_layer_%` diff --git a/scripts/ddl/adapter/procedures/drop_test_schemas.sql b/scripts/ddl/adapter/procedures/drop_test_schemas.sql new file mode 100644 index 000000000..2c16dfc75 --- /dev/null +++ b/scripts/ddl/adapter/procedures/drop_test_schemas.sql @@ -0,0 +1,15 @@ +/* +This procedure will drop every test schema, as defined by `adapter.test_schema`, for which it has access. +In the event this procedure does not have access, it sill simply print out the name of the schema and continue. +*/ +CREATE OR REPLACE PROCEDURE adapter.drop_test_schemas() +BEGIN + FOR test_schema IN (SELECT * FROM adapter.test_schema) + DO + BEGIN + EXECUTE IMMEDIATE CONCAT("DROP SCHEMA IF EXISTS `", test_schema.schema_name, "` CASCADE"); + EXCEPTION WHEN ERROR THEN + SELECT CONCAT("Encountered an error when dropping `", test_schema.schema_name, "`. Skipping."); + END; + END FOR; +END; diff --git a/scripts/ddl/adapter/schema.sql b/scripts/ddl/adapter/schema.sql new file mode 100644 index 000000000..89532c342 --- /dev/null +++ b/scripts/ddl/adapter/schema.sql @@ -0,0 +1 @@ +CREATE SCHEMA `dbt-test-env`.adapter; diff --git a/scripts/ddl/adapter/views/test_schema.sql b/scripts/ddl/adapter/views/test_schema.sql new file mode 100644 index 000000000..9497a741d --- /dev/null +++ b/scripts/ddl/adapter/views/test_schema.sql @@ -0,0 +1,16 @@ +/* +This view identifies all test schemas that `dbt-bigquery` creates in the course of running integration tests. +Ideally this view will return no records as test schemas should be ephemeral in nature. +However, there are many scenarios where a test schema will be left behind. The two most common are: +- the fixture simply does not delete it when it's supposed to +- the connection is broken +*/ +CREATE OR REPLACE VIEW adapter.test_schema AS + SELECT * + FROM `dbt-test-env`.INFORMATION_SCHEMA.SCHEMATA + WHERE ( + schema_name LIKE 'test%_test_%' OR -- e.g. test12345678901234567890_test_basic + REGEXP_CONTAINS(schema_name, r'^\d{13}$') -- e.g. 1234567890123 + ) + AND catalog_name = 'dbt-test-env' +; diff --git a/scripts/ddl/adapter/views/test_schema_summary.sql b/scripts/ddl/adapter/views/test_schema_summary.sql new file mode 100644 index 000000000..98815d764 --- /dev/null +++ b/scripts/ddl/adapter/views/test_schema_summary.sql @@ -0,0 +1,16 @@ +/* +This view groups test schemas by the integration test that created them. +The goal is to identify any integration tests which may not be properly tearing down their test schema. +*/ +CREATE OR REPLACE VIEW adapter.test_schema_summary AS + SELECT + CASE + WHEN LENGTH(schema_name) > 13 + THEN RIGHT(schema_name, LENGTH(schema_name)-LENGTH('test12345678901234567890_')) + ELSE '13-digit strings' + END AS integration_test, + COUNT(schema_name) AS schema_count + FROM adapter.test_schema + GROUP BY 1 + ORDER BY 2 DESC +;