-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_etl_and_model.py
106 lines (87 loc) · 3.01 KB
/
run_etl_and_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import progressbar as pb
import settings
from modelling import model_functions
from utils.database import Database
from utils.utils import load_data_dict, load_yaml
STEPS = 13
steps_iter = iter(range(1, STEPS+1))
logger = settings.configure_logger()
ROOT_FOLDER = settings.get_root_dir()
DATA_FOLDER = os.path.join(ROOT_FOLDER, 'data/')
SQL_FOLDER = os.path.join(ROOT_FOLDER, 'sql/')
RESULTS_FOLDER = os.path.join(ROOT_FOLDER, 'results/')
data_config = os.path.join(ROOT_FOLDER, 'config/base/data_files.yaml')
database = Database.get_instance()
with pb.ProgressBar(max_value=13) as bar:
logger.info("Creating schemas")
database.execute_sql(
os.path.join(SQL_FOLDER,'create_schemas.sql'),
read_file=True
)
bar.update(next(steps_iter))
logger.info("Creating tables")
database.execute_sql(
os.path.join(SQL_FOLDER, 'create_tables.sql'),
read_file=True
)
bar.update(next(steps_iter))
text_dict, gis_dict, osm_file = load_data_dict(data_config)
bar.update(next(steps_iter))
logger.info("Loading text files to RAW")
database.load_text(DATA_FOLDER, text_dict)
bar.update(next(steps_iter))
logger.info("Loading shapefiles to GIS")
database.load_gis(DATA_FOLDER, gis_dict)
bar.update(next(steps_iter))
logger.info("Loading OSM data to RAW")
database.load_osm_to_db(
DATA_FOLDER,
osm_file,
os.path.join(SQL_FOLDER, 'update_osm_tables.sql'),
)
bar.update(next(steps_iter))
logger.info("Cleaning data")
database.execute_sql(
os.path.join(SQL_FOLDER, 'clean_data.sql'),
read_file=True
)
bar.update(next(steps_iter))
logger.info("Entitizing data")
database.execute_sql(
os.path.join(SQL_FOLDER, 'create_semantic.sql'),
read_file=True
)
bar.update(next(steps_iter))
model_config = os.path.join(ROOT_FOLDER, 'config/base/model_config.yaml')
params = load_yaml(model_config)
hyper_params = params['hyper_params']
logger.info('Model parameters loaded')
bar.update(next(steps_iter))
logger.info('Creating timestamps')
model_functions.create_timestamps(
params['time_defs'],
params['time_strata'],
n_timepoints=hyper_params['n_timepoint'],
)
bar.update(next(steps_iter))
logger.info('Selecting K nearest Points of Interest for each OA')
model_functions.create_k_poi(
SQL_FOLDER,
k_poi=hyper_params['k_POI'],
poi_dict=params['points_of_interest'],
)
bar.update(next(steps_iter))
logger.info('Creating possible combinations of trips for OTP input')
model_functions.create_trips(SQL_FOLDER)
database.execute_sql(
os.path.join(SQL_FOLDER, 'create_model_otp_trips.sql'),
read_file=True
)
bar.update(next(steps_iter))
file_name = 'otp_trips'
logger.info(f'Storing model.{file_name} to {file_name}.csv')
database.copy_table_to_csv(
f'model.{file_name}',
os.path.join(RESULTS_FOLDER, f'{file_name}.csv'),
)