Skip to content

Commit

Permalink
Merge pull request #1371 from cityofaustin/15263_add_new_lkp_table_va…
Browse files Browse the repository at this point in the history
…lues

Add new lookup tables and values to get VZDB up to date with CRIS v27
  • Loading branch information
johnclary authored Feb 29, 2024
2 parents a9bed0c + e880ade commit 4db4406
Show file tree
Hide file tree
Showing 16 changed files with 1,096 additions and 0 deletions.
3 changes: 3 additions & 0 deletions atd-toolbox/get_lookup_table_changes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
env
extract
*.sql
23 changes: 23 additions & 0 deletions atd-toolbox/get_lookup_table_changes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
This folder and the script it contains is for the purpose of getting our lookup tables up to date with the latest lookup export from CRIS. We will probably need to run this after every major CRIS release.

### Install packages

Create or activate your virtual python environment using [venv](https://docs.python.org/3/library/venv.html).

Install packages from the requirements.txt file:

`pip install -r requirements.txt`

### Env file

Rename the env_template to env

Fill in the values using your credentials for the VZ read replica.

### Running the script

In order to run this script you need to have a recent CRIS lookup table export csv and provide the file path as an argument in the command line like so:

`python3 get_lookup_table_changes.py --input path_to_extract.csv `

Running `get_lookup_table_changes.py` can take awhile, so sit tight. Once the script is done, a file will be created in this directory called `up_migrations.sql` that contains all of the sql commands generated from running the script. The contents of this file can then be used to create a migration in the hasura console so we can track these huge changes. A file called `down_migrations.sql` is also created in this directory which you can use as the down migration.
5 changes: 5 additions & 0 deletions atd-toolbox/get_lookup_table_changes/env_template
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DB_HOST=
DB_USER=
DB_PASS=
DB_NAME=
DB_SSL_REQUIREMENT=
211 changes: 211 additions & 0 deletions atd-toolbox/get_lookup_table_changes/get_lookup_table_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#!/usr/bin/env python

import csv
import json
import time
import re
import os
import psycopg2
import psycopg2.extras
from dotenv import load_dotenv
import argparse

load_dotenv("env")

DB_HOST = os.getenv("DB_HOST")
DB_USER = os.getenv("DB_USER")
DB_PASS = os.getenv("DB_PASS")
DB_NAME = os.getenv("DB_NAME")
DB_SSL_REQUIREMENT = os.getenv("DB_SSL_REQUIREMENT")


def get_pg_connection():
"""
Returns a connection to the Postgres database
"""
return psycopg2.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASS,
dbname=DB_NAME,
sslmode=DB_SSL_REQUIREMENT,
sslrootcert="/root/rds-combined-ca-bundle.pem",
)


def table_exists(conn, table_name):
"""
Checks if a table exists in a PostgreSQL database.
Args:
conn (psycopg2.extensions.connection): A connection to the PostgreSQL database.
table_name (str): The name of the table to check for existence.
Returns:
bool: True if the table exists, False otherwise.
"""
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = %s
);
""",
(table_name,),
)

result = cur.fetchone()
return result[0]

except Exception as e:
print(f"Error checking table existence: {e}")
return False


def read_and_group_csv(file_path):
"""
Returns a dict where each key is the lookup table name and the value
is a dict of all the lookup ids/descs for that lookup table
"""
grouped_data = {}

with open(file_path, newline="") as csvfile:
csvreader = csv.reader(csvfile, delimiter=",", quotechar='"')

# Skip the first row (header)
next(csvreader)

for row in csvreader:
key = row[0]
inner_dict = {"id": int(row[1]), "description": row[2]}

if key not in grouped_data:
grouped_data[key] = []

grouped_data[key].append(inner_dict)

return grouped_data


def escape_single_quotes(input_string):
return input_string.replace("'", "''")


def new_table(name):
return f"""
create table public.atd_txdot__{name}_lkp (
id serial primary key,
{name}_id integer not null,
{name}_desc varchar(255) not null
);
"""


def main(file_path):
data = read_and_group_csv(file_path)

# Pretty-print the grouped data as JSON
# print(json.dumps(data, indent=4))

pg = get_pg_connection()
cursor = pg.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
changes = []
down_changes = []

for table in data:
# here are tables which are special cases
# The states (as in United States) is non-uniform and does not need inspection.
# The counties are equally fixed.
if table in ["STATE_ID", "CNTY_ID"]:
continue

match = re.search(r"(^.*)_ID$", table)
name_component = match.group(1).lower()

print()
print("👀Looking into table: ", name_component)

table_name = "atd_txdot__" + name_component + "_lkp"
exists = table_exists(pg, table_name)
if not exists:
print("💥 Missing table: ", table_name)
changes.append(f"\n-- Adding table {table_name}")
changes.append(new_table(name_component))
down_changes.append(f"\n-- Dropping table {table_name}")
new_table_down = f"drop table if exists public.{table_name};"
down_changes.append(new_table_down)
for record in data[table]:
# We do not have a record on file with this ID
print(f"❓ Id {str(record['id'])} not found in {table_name}")
print(" CSV Value: ", record["description"])
print()
insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
changes.append(insert)
# Dont need down changes here because the down is just deleting the table

else:
is_first_change = True
for record in data[table]:
sql = f"""
select {name_component}_id as id, {name_component}_desc as description
from {table_name} where {name_component}_id = {str(record['id'])};
"""
cursor.execute(sql)
db_result = cursor.fetchone()
if db_result:
# We have a record on file with this ID
if db_result["description"] == record["description"]:
# print(f"✅ Value \"{record['description']}\" with id {str(record['id'])} found in {table_name}")
pass
else:
print(
f"❌ Id {str(record['id'])} found in {table_name} has a description mismatch:"
)
print(" CSV Value: ", record["description"])
print(" DB Value: ", db_result["description"])
print()
update = f"update public.{table_name} set {name_component}_desc = '{escape_single_quotes(record['description'])}' where {name_component}_id = {str(record['id'])};"
if is_first_change == True:
changes.append(f"\n-- Changes to table {table_name}")
down_changes.append(f"\n-- Changes to table {table_name}")
changes.append(update)
update_down = f"update public.{table_name} set {name_component}_desc = '{db_result['description']}' where {name_component}_id = {str(record['id'])};"
down_changes.append(update_down)
is_first_change = False
else:
# We do not have a record on file with this ID
# print(f"Value \"{record['description']}\" with id {str(record['id'])} not found in {table_name}")
print(f"❓ Id {str(record['id'])} not found in {table_name}")
print(" CSV Value: ", record["description"])
print()
insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
if is_first_change == True:
changes.append(f"\n-- Changes to table {table_name}")
down_changes.append(f"\n-- Changes to table {table_name}")
changes.append(insert)
insert_down = f"delete from public.{table_name} where {name_component}_id = {str(record['id'])};"
down_changes.append(insert_down)
is_first_change = False

print("\n🛠️ Here are the changes to be made:\n")
print("\n".join(changes).strip())

outfile = open("up_migration.sql", "w")
outfile.write("\n".join(changes).strip())
outfile.close()

outfile_down = open("down_migration.sql", "w")
outfile_down.write("\n".join(down_changes).strip())
outfile_down.close()


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input", help="extract file path")
args = parser.parse_args()
file_path = args.input

main(file_path)
3 changes: 3 additions & 0 deletions atd-toolbox/get_lookup_table_changes/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
psycopg2==2.9.3
psycopg2_binary==2.9.5
python-dotenv==1.0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__autonomous_level_engaged_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__autonomous_unit_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__inv_da_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__rpt_road_type_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__trauma_centers_lkp
schema: public
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ insert_permissions:
- ownr_state_id
- ownr_zip
- poss_injry_cnt
- rpt_autonomous_level_engaged_id
- sus_serious_injry_cnt
- tot_injry_cnt
- travel_direction
Expand Down Expand Up @@ -259,6 +260,7 @@ select_permissions:
- pedalcyclist_action_id
- pedestrian_action_id
- poss_injry_cnt
- rpt_autonomous_level_engaged_id
- sus_serious_injry_cnt
- tot_injry_cnt
- travel_direction
Expand Down Expand Up @@ -368,6 +370,7 @@ select_permissions:
- ownr_state_id
- ownr_zip
- poss_injry_cnt
- rpt_autonomous_level_engaged_id
- sus_serious_injry_cnt
- tot_injry_cnt
- travel_direction
Expand Down Expand Up @@ -488,6 +491,7 @@ update_permissions:
- pedalcyclist_action_id
- pedestrian_action_id
- poss_injry_cnt
- rpt_autonomous_level_engaged_id
- sus_serious_injry_cnt
- tot_injry_cnt
- travel_direction
Expand Down
2 changes: 2 additions & 0 deletions atd-vzd/metadata/databases/default/tables/tables.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
- "!include public_atd_txdot__prsn_type_lkp.yaml"
- "!include public_atd_txdot__road_part_lkp.yaml"
- "!include public_atd_txdot__road_type_lkp.yaml"
- "!include public_atd_txdot__rpt_road_type_lkp.yaml"
- "!include public_atd_txdot__rwy_sys_lkp.yaml"
- "!include public_atd_txdot__street_sfx_lkp.yaml"
- "!include public_atd_txdot__traffic_cntl_lkp.yaml"
- "!include public_atd_txdot__trauma_centers_lkp.yaml"
- "!include public_atd_txdot__trvl_dir_lkp.yaml"
- "!include public_atd_txdot__veh_body_styl_lkp.yaml"
- "!include public_atd_txdot__veh_make_lkp.yaml"
Expand Down
Loading

0 comments on commit 4db4406

Please sign in to comment.