-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
48 lines (40 loc) · 1.31 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# End-to-End Diabetes Machine Learning Pipeline
# Author: Oguz Erdogan
# www.github.com/oguzerdo
import pandas as pd
from scripts.preprocess import *
from scripts.train import *
from contextlib import contextmanager
import time
import joblib
@contextmanager
def timer(title):
t0 = time.time()
yield
if (time.time() - t0) < 60:
print("{} - done in {:.0f}s".format(title, time.time() - t0))
print(" ")
else:
duration = time.time() - t0
min = duration // 60
second = int(duration - min * 60)
print(f"{title} is finished in {min} min. {second} second")
print(" ")
def main(debug=True, tuning=True):
with timer("Pipeline"):
print("Pipeline started")
with timer("Reading Dataset"):
print("Reading Dataset Started")
df = pd.read_csv(DATA_PATH)
with timer("Data Preprocessing"):
print("Data Preprocessing Started")
df = data_preprocessing(df)
with timer("Training"):
print("Training Started")
final_model = train_model(debug, tuning)
joblib.dump(final_model, 'outputs/final_model.pkl')
if __name__ == "__main__":
namespace = get_namespace()
with timer("Full model run"):
main(debug=namespace.debug,
tuning=namespace.tuning)