Merge pull request #1371 from cityofaustin/15263_add_new_lkp_table_va…

…lues Add new lookup tables and values to get VZDB up to date with CRIS v27
cityofaustin · Feb 29, 2024 · 4db4406 · 4db4406
2 parents a9bed0c + e880ade
commit 4db4406
Show file tree

Hide file tree

Showing 16 changed files with 1,096 additions and 0 deletions.
diff --git a/atd-toolbox/get_lookup_table_changes/.gitignore b/atd-toolbox/get_lookup_table_changes/.gitignore
@@ -0,0 +1,3 @@
+env
+extract
+*.sql
diff --git a/atd-toolbox/get_lookup_table_changes/README.md b/atd-toolbox/get_lookup_table_changes/README.md
@@ -0,0 +1,23 @@
+This folder and the script it contains is for the purpose of getting our lookup tables up to date with the latest lookup export from CRIS. We will probably need to run this after every major CRIS release.
+
+### Install packages
+
+Create or activate your virtual python environment using [venv](https://docs.python.org/3/library/venv.html).
+
+Install packages from the requirements.txt file:
+
+`pip install -r requirements.txt`
+
+### Env file
+
+Rename the env_template to env
+
+Fill in the values using your credentials for the VZ read replica.
+
+### Running the script
+
+In order to run this script you need to have a recent CRIS lookup table export csv and provide the file path as an argument in the command line like so:
+
+`python3 get_lookup_table_changes.py --input path_to_extract.csv `
+
+Running `get_lookup_table_changes.py` can take awhile, so sit tight. Once the script is done, a file will be created in this directory called `up_migrations.sql` that contains all of the sql commands generated from running the script. The contents of this file can then be used to create a migration in the hasura console so we can track these huge changes. A file called `down_migrations.sql` is also created in this directory which you can use as the down migration.
diff --git a/atd-toolbox/get_lookup_table_changes/env_template b/atd-toolbox/get_lookup_table_changes/env_template
@@ -0,0 +1,5 @@
+DB_HOST=
+DB_USER=
+DB_PASS=
+DB_NAME=
+DB_SSL_REQUIREMENT=
diff --git a/atd-toolbox/get_lookup_table_changes/get_lookup_table_changes.py b/atd-toolbox/get_lookup_table_changes/get_lookup_table_changes.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+
+import csv
+import json
+import time
+import re
+import os
+import psycopg2
+import psycopg2.extras
+from dotenv import load_dotenv
+import argparse
+
+load_dotenv("env")
+
+DB_HOST = os.getenv("DB_HOST")
+DB_USER = os.getenv("DB_USER")
+DB_PASS = os.getenv("DB_PASS")
+DB_NAME = os.getenv("DB_NAME")
+DB_SSL_REQUIREMENT = os.getenv("DB_SSL_REQUIREMENT")
+
+
+def get_pg_connection():
+    """
+    Returns a connection to the Postgres database
+    """
+    return psycopg2.connect(
+        host=DB_HOST,
+        user=DB_USER,
+        password=DB_PASS,
+        dbname=DB_NAME,
+        sslmode=DB_SSL_REQUIREMENT,
+        sslrootcert="/root/rds-combined-ca-bundle.pem",
+    )
+
+
+def table_exists(conn, table_name):
+    """
+    Checks if a table exists in a PostgreSQL database.
+
+    Args:
+    conn (psycopg2.extensions.connection): A connection to the PostgreSQL database.
+    table_name (str): The name of the table to check for existence.
+
+    Returns:
+    bool: True if the table exists, False otherwise.
+    """
+    try:
+        with conn.cursor() as cur:
+            cur.execute(
+                """
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables
+                    WHERE table_schema = 'public'
+                    AND table_name = %s
+                );
+            """,
+                (table_name,),
+            )
+
+            result = cur.fetchone()
+            return result[0]
+
+    except Exception as e:
+        print(f"Error checking table existence: {e}")
+        return False
+
+
+def read_and_group_csv(file_path):
+    """
+    Returns a dict where each key is the lookup table name and the value
+    is a dict of all the lookup ids/descs for that lookup table
+    """
+    grouped_data = {}
+
+    with open(file_path, newline="") as csvfile:
+        csvreader = csv.reader(csvfile, delimiter=",", quotechar='"')
+
+        # Skip the first row (header)
+        next(csvreader)
+
+        for row in csvreader:
+            key = row[0]
+            inner_dict = {"id": int(row[1]), "description": row[2]}
+
+            if key not in grouped_data:
+                grouped_data[key] = []
+
+            grouped_data[key].append(inner_dict)
+
+    return grouped_data
+
+
+def escape_single_quotes(input_string):
+    return input_string.replace("'", "''")
+
+
+def new_table(name):
+    return f"""
+    create table public.atd_txdot__{name}_lkp (
+        id serial primary key,
+        {name}_id integer not null,
+        {name}_desc varchar(255) not null
+    );
+    """
+
+
+def main(file_path):
+    data = read_and_group_csv(file_path)
+
+    # Pretty-print the grouped data as JSON
+    # print(json.dumps(data, indent=4))
+
+    pg = get_pg_connection()
+    cursor = pg.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+    changes = []
+    down_changes = []
+
+    for table in data:
+        # here are tables which are special cases
+        # The states (as in United States) is non-uniform and does not need inspection.
+        # The counties are equally fixed.
+        if table in ["STATE_ID", "CNTY_ID"]:
+            continue
+
+        match = re.search(r"(^.*)_ID$", table)
+        name_component = match.group(1).lower()
+
+        print()
+        print("👀Looking into table: ", name_component)
+
+        table_name = "atd_txdot__" + name_component + "_lkp"
+        exists = table_exists(pg, table_name)
+        if not exists:
+            print("💥 Missing table: ", table_name)
+            changes.append(f"\n-- Adding table {table_name}")
+            changes.append(new_table(name_component))
+            down_changes.append(f"\n-- Dropping table {table_name}")
+            new_table_down = f"drop table if exists public.{table_name};"
+            down_changes.append(new_table_down)
+            for record in data[table]:
+                # We do not have a record on file with this ID
+                print(f"❓ Id {str(record['id'])} not found in {table_name}")
+                print("      CSV Value: ", record["description"])
+                print()
+                insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
+                changes.append(insert)
+                # Dont need down changes here because the down is just deleting the table
+
+        else:
+            is_first_change = True
+            for record in data[table]:
+                sql = f"""
+                select {name_component}_id as id, {name_component}_desc as description 
+                from {table_name} where {name_component}_id = {str(record['id'])};
+                """
+                cursor.execute(sql)
+                db_result = cursor.fetchone()
+                if db_result:
+                    # We have a record on file with this ID
+                    if db_result["description"] == record["description"]:
+                        # print(f"✅ Value \"{record['description']}\" with id {str(record['id'])} found in {table_name}")
+                        pass
+                    else:
+                        print(
+                            f"❌ Id {str(record['id'])} found in {table_name} has a description mismatch:"
+                        )
+                        print("      CSV Value: ", record["description"])
+                        print("       DB Value: ", db_result["description"])
+                        print()
+                        update = f"update public.{table_name} set {name_component}_desc = '{escape_single_quotes(record['description'])}' where {name_component}_id = {str(record['id'])};"
+                        if is_first_change == True:
+                            changes.append(f"\n-- Changes to table {table_name}")
+                            down_changes.append(f"\n-- Changes to table {table_name}")
+                        changes.append(update)
+                        update_down = f"update public.{table_name} set {name_component}_desc = '{db_result['description']}' where {name_component}_id = {str(record['id'])};"
+                        down_changes.append(update_down)
+                        is_first_change = False
+                else:
+                    # We do not have a record on file with this ID
+                    # print(f"Value \"{record['description']}\" with id {str(record['id'])} not found in {table_name}")
+                    print(f"❓ Id {str(record['id'])} not found in {table_name}")
+                    print("      CSV Value: ", record["description"])
+                    print()
+                    insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
+                    if is_first_change == True:
+                        changes.append(f"\n-- Changes to table {table_name}")
+                        down_changes.append(f"\n-- Changes to table {table_name}")
+                    changes.append(insert)
+                    insert_down = f"delete from public.{table_name} where {name_component}_id = {str(record['id'])};"
+                    down_changes.append(insert_down)
+                    is_first_change = False
+
+    print("\n🛠️ Here are the changes to be made:\n")
+    print("\n".join(changes).strip())
+
+    outfile = open("up_migration.sql", "w")
+    outfile.write("\n".join(changes).strip())
+    outfile.close()
+
+    outfile_down = open("down_migration.sql", "w")
+    outfile_down.write("\n".join(down_changes).strip())
+    outfile_down.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", help="extract file path")
+    args = parser.parse_args()
+    file_path = args.input
+
+    main(file_path)
diff --git a/atd-toolbox/get_lookup_table_changes/requirements.txt b/atd-toolbox/get_lookup_table_changes/requirements.txt
@@ -0,0 +1,3 @@
+psycopg2==2.9.3
+psycopg2_binary==2.9.5
+python-dotenv==1.0.1
diff --git a/...vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_level_engaged_lkp.yaml b/...vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_level_engaged_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__autonomous_level_engaged_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_unit_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_unit_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__autonomous_unit_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__inv_da_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__inv_da_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__inv_da_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__rpt_road_type_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__rpt_road_type_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__rpt_road_type_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__trauma_centers_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__trauma_centers_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__trauma_centers_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot_units.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot_units.yaml
@@ -140,6 +140,7 @@ insert_permissions:
         - ownr_state_id
         - ownr_zip
         - poss_injry_cnt
+        - rpt_autonomous_level_engaged_id
         - sus_serious_injry_cnt
         - tot_injry_cnt
         - travel_direction
@@ -259,6 +260,7 @@ select_permissions:
         - pedalcyclist_action_id
         - pedestrian_action_id
         - poss_injry_cnt
+        - rpt_autonomous_level_engaged_id
         - sus_serious_injry_cnt
         - tot_injry_cnt
         - travel_direction
@@ -368,6 +370,7 @@ select_permissions:
         - ownr_state_id
         - ownr_zip
         - poss_injry_cnt
+        - rpt_autonomous_level_engaged_id
         - sus_serious_injry_cnt
         - tot_injry_cnt
         - travel_direction
@@ -488,6 +491,7 @@ update_permissions:
         - pedalcyclist_action_id
         - pedestrian_action_id
         - poss_injry_cnt
+        - rpt_autonomous_level_engaged_id
         - sus_serious_injry_cnt
         - tot_injry_cnt
         - travel_direction

diff --git a/atd-vzd/metadata/databases/default/tables/tables.yaml b/atd-vzd/metadata/databases/default/tables/tables.yaml
@@ -22,9 +22,11 @@
 - "!include public_atd_txdot__prsn_type_lkp.yaml"
 - "!include public_atd_txdot__road_part_lkp.yaml"
 - "!include public_atd_txdot__road_type_lkp.yaml"
+- "!include public_atd_txdot__rpt_road_type_lkp.yaml"
 - "!include public_atd_txdot__rwy_sys_lkp.yaml"
 - "!include public_atd_txdot__street_sfx_lkp.yaml"
 - "!include public_atd_txdot__traffic_cntl_lkp.yaml"
+- "!include public_atd_txdot__trauma_centers_lkp.yaml"
 - "!include public_atd_txdot__trvl_dir_lkp.yaml"
 - "!include public_atd_txdot__veh_body_styl_lkp.yaml"
 - "!include public_atd_txdot__veh_make_lkp.yaml"