Skip to content

Commit

Permalink
Migrate sql usage of location service to db gateway (#29)
Browse files Browse the repository at this point in the history
* db gateway now create location service

* add etl for location_service

* remove sqlalchemy and use raw sql querries

* bug fixes, remote fkey rule for etl purposes

* Remove print stmt
  • Loading branch information
ryan-lam authored Nov 5, 2023
1 parent a39af69 commit 6bc3c9b
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 398 deletions.
2 changes: 1 addition & 1 deletion db_gateway/src/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def check_null(value):


def reset_tables():
from models import RouteModel, Weather
from models import RouteModel, Weather, LocationService

Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)
10 changes: 9 additions & 1 deletion db_gateway/src/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@ class RouteModel(Base):
gpx_elapsed_dist_m = Column(Float)
geopy_elapsed_dist_m = Column(Float)
geopy_dist_from_last_m = Column(Float)
weather_id = Column(Integer, ForeignKey("weather.id"))
weather_id = Column(Integer)


class LocationService(Base):
__tablename__ = "location_service"
id = Column(Integer, primary_key=True)
lat = Column(Float)
lon = Column(Float)
geo = Column(Geometry(geometry_type="POINT", srid=4326))


class Weather(Base):
Expand Down
140 changes: 0 additions & 140 deletions db_gateway/uw_sample_gpx.csv

This file was deleted.

52 changes: 42 additions & 10 deletions etl/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from etl_routemodel.etl_routemodel import main as run_routemodel_etl
from etl_weather.etl_weather import main as run_weather_etl
from etl_speed_limit.etl_speed_limit import main as run_speed_limit_etl
from etl_location_service.etl_location_service import main as run_location_service_etl
from termcolor import colored


Expand Down Expand Up @@ -60,23 +61,27 @@ def cmd_routemodel(db_user, db_password, db_host, db_name):
print(colored("routemodel ETL success", "green"))


def cmd_streetname_speedlimit(db_user, db_password, db_host, db_name):
def cmd_location_service(db_user, db_password, db_host, db_name):
answers = questionary.form(
bingmaps_api_key=questionary.password("Bing Maps API key", default=""),
csv_filepath=questionary.path(
"Path to data (CSV file)",
default="",
validate=lambda p: validate_path(p, ".csv"),
),
confirm=questionary.confirm(
"Confirm street_name/speed_limit ETL operation",
"Confirm routemodel ETL operation",
default=False,
auto_enter=False,
),
).ask()
bingmaps_api_key = answers["bingmaps_api_key"]
csv_filepath = answers["csv_filepath"]
confirm = answers["confirm"]

if not confirm:
print(colored("streetname/speedlimit ETL cancelled", "red"))
print(colored("location_service ETL cancelled", "red"))
else:
run_speed_limit_etl(db_user, db_password, db_host, db_name, bingmaps_api_key)
print(colored("streetname/speedlimit ETL success", "green"))
run_location_service_etl(csv_filepath, db_user, db_password, db_host, db_name)
print(colored("location_service ETL success", "green"))


def cmd_weather(db_user, db_password, db_host, db_name):
Expand Down Expand Up @@ -111,6 +116,25 @@ def cmd_weather(db_user, db_password, db_host, db_name):
print(colored("weather ETL success", "green"))


def cmd_streetname_speedlimit(db_user, db_password, db_host, db_name):
answers = questionary.form(
bingmaps_api_key=questionary.password("Bing Maps API key", default=""),
confirm=questionary.confirm(
"Confirm street_name/speed_limit ETL operation",
default=False,
auto_enter=False,
),
).ask()
bingmaps_api_key = answers["bingmaps_api_key"]
confirm = answers["confirm"]

if not confirm:
print(colored("streetname/speedlimit ETL cancelled", "red"))
else:
run_speed_limit_etl(db_user, db_password, db_host, db_name, bingmaps_api_key)
print(colored("streetname/speedlimit ETL success", "green"))


def cmd_drop_tables(db_user, db_password, db_host, db_name):
answers = questionary.form(
table_names=questionary.checkbox(
Expand Down Expand Up @@ -146,7 +170,13 @@ def cmd_drop_tables(db_user, db_password, db_host, db_name):
)
etl_name = questionary.select(
"Select ETL operation",
choices=["routemodel", "weather", "speed_limit/street_names", "drop_tables"],
choices=[
"routemodel",
"location_service",
"weather",
"speed_limit/street_names",
"drop_tables",
],
).ask()

auth, db_user, db_password, db_host, db_name = validate_db_creds()
Expand All @@ -156,9 +186,11 @@ def cmd_drop_tables(db_user, db_password, db_host, db_name):

if etl_name == "routemodel":
cmd_routemodel(db_user, db_password, db_host, db_name)
elif etl_name == "location_service":
cmd_location_service(db_user, db_password, db_host, db_name)
elif etl_name == "weather":
cmd_weather(db_user, db_password, db_host, db_name)
elif etl_name == "drop_tables":
cmd_drop_tables(db_user, db_password, db_host, db_name)
elif etl_name == "speed_limit/street_names":
cmd_streetname_speedlimit(db_user, db_password, db_host, db_name)
elif etl_name == "drop_tables":
cmd_drop_tables(db_user, db_password, db_host, db_name)
51 changes: 51 additions & 0 deletions etl/etl_location_service/etl_location_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd
from geoalchemy2 import Geometry
from sqlalchemy import create_engine


def seed_from_csv(csv_filepath, db_user, db_password, db_host, db_name):
file = Path(csv_filepath)
if not file.is_file():
raise FileNotFoundError("No file exists at the location specified")
gdf = gpd.GeoDataFrame(pd.read_csv(file))
gdf.fillna(np.nan).replace([np.nan], [None])
gdf = gdf[["lat", "lon", "geo"]]
gdf.index.name = "id"
gdf.index += 1
# gdf = gdf.head(10) # For testing purposes

engine = create_engine(
f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}/{db_name}"
)

response = gdf.to_sql(
name="location_service",
con=engine,
schema="public",
if_exists="replace",
index=True,
method="multi",
dtype={"geo": Geometry("POINT", srid=4326)},
)
if gdf.shape[0] != response:
raise SystemError("dataframe insertion failed")
else:
print("location_service dataframe insertion success")


def main(csv_filepath, db_user, db_password, db_host, db_name):
print("1) Parsing and seeding routemodel into location_service...")
seed_from_csv(csv_filepath, db_user, db_password, db_host, db_name)


if __name__ == "__main__":
csv_filepath = ""
db_user = ""
db_password = ""
db_host = ""
db_name = ""
main(csv_filepath, db_user, db_password, db_host, db_name)
6 changes: 6 additions & 0 deletions location_service/.env.sample
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
DATABASE_URI="postgresql+psycopg2://user:password@host:port/database"
DB_HOST=""
DB_NAME=""
DB_USER=""
DB_PASSWORD=""
DB_PORT=""
REQUIRE_AUTH=""
AUTH_KEY=""
9 changes: 7 additions & 2 deletions location_service/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
FROM python:3.11.4-buster
RUN pip install --upgrade cython pip

ENV DATABASE_URI=""
ENV DB_HOST=""
ENV DB_NAME=""
ENV DB_USER=""
ENV DB_PASSWORD=""
ENV DB_PORT=""
ENV REQUIRE_AUTH=""
ENV AUTH_KEY=""

WORKDIR /usr/location_service
Expand All @@ -13,4 +18,4 @@ COPY . .


EXPOSE 5000
CMD [ "python", "src/main.py", "--create-table", "true", "--seed-filename", "./uw_sample_gpx.csv" ]
CMD [ "python", "src/main.py" ]
20 changes: 4 additions & 16 deletions location_service/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,10 @@
blinker==1.6.2
click==8.1.5
blinker==1.7.0
click==8.1.7
colorama==0.4.6
Flask==2.3.2
Flask-SQLAlchemy==3.0.5
GeoAlchemy2==0.14.0
greenlet==2.0.2
Flask==3.0.0
itsdangerous==2.1.2
Jinja2==3.1.2
MarkupSafe==2.1.3
numpy==1.25.1
packaging==23.1
pandas==2.0.3
psycopg2==2.9.6
python-dateutil==2.8.2
psycopg2==2.9.9
python-dotenv==1.0.0
pytz==2023.3
six==1.16.0
SQLAlchemy==2.0.18
typing_extensions==4.7.1
tzdata==2023.3
Werkzeug==3.0.1
Loading

0 comments on commit 6bc3c9b

Please sign in to comment.