-
Notifications
You must be signed in to change notification settings - Fork 4
/
regression.py
67 lines (53 loc) · 2.6 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from pycaret.regression import *
class RegressionAutoML:
def __init__(self):
pass
# setup regression automl
def regressionAutoML(self,
data,
targetName = '',
idColumnName = '',
trainSize = 0.7,
random_seed = 1,
categoricalFeatures = [],
numericFeatures = [],
ignoreFeatures = []):
# check if user input target column name
if targetName == '':
y_actual_name = data.columns[-1]
else:
y_actual_name = targetName
s, column_and_datatype, target_column_name = setup(data,
target = y_actual_name,
session_id = random_seed,
train_size = trainSize,
categorical_features = categoricalFeatures,
numeric_features = numericFeatures,
ignore_features = ignoreFeatures,
silent = True)
# convert label column to value "label"
column_and_datatype[target_column_name] = "label"
if idColumnName != '':
column_and_datatype[idColumnName] = "ID Column"
for index, val in column_and_datatype.iteritems():
if "float" in str(val):
column_and_datatype[index] = "Numeric"
elif "object" in str(val):
column_and_datatype[index] = "Categorical"
elif "int" in str(val):
column_and_datatype[index] = "Numeric"
# convert to dataframe
column_and_datatype_dataframe = column_and_datatype.to_frame(name = 'Data Type')
column_and_datatype_dataframe = column_and_datatype_dataframe.reset_index()
column_and_datatype_dataframe = column_and_datatype_dataframe.rename(columns={'index': 'Columns'})
return column_and_datatype_dataframe
# train model
# return best model and table of all models comparison
def fitRegressionModels(self):
best, results = compare_models()
return best, results
def save(self, best):
save_model(best, 'regression_model')
def tune(self, model):
tuned_dt = tune_model(model)
return tuned_dt