-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathRandomForrestFPLModel.py
99 lines (77 loc) · 3.83 KB
/
RandomForrestFPLModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import math, datetime
import numpy as np
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
from matplotlib import style
import pickle
import os
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
style.use('ggplot')
thisRound = 29
dataPath = "./prediction/Gameweeks/"
savePath = "./prediction/Gameweeks/"+str(thisRound)+"/prediction/RandomForest/"
os.makedirs(savePath,exist_ok=True)
trainingData_df = pd.read_csv(dataPath + "trainingData.csv")
trainingData_year_df = pd.DataFrame()
for r in range(1,thisRound):
df = pd.read_csv(dataPath + "2020 Training Data/round"+str(r)+"Training.csv")
trainingData_year_df = pd.concat([trainingData_year_df,df], ignore_index=True)
predictData_df = pd.read_csv(dataPath + str(thisRound) + "/next_games/predictionData.csv")
save_df = pd.DataFrame()
save_df['player_name'] = predictData_df['player_name']
save_df['player_team'] = predictData_df['player_team']
save_df['element_type'] = predictData_df['element_type']
save_df['value'] = predictData_df['value']
save_df['element'] = predictData_df['element']
save_df['round'] = predictData_df['round']
trainingData_df = trainingData_df.drop(columns=['element','opponent_team','player_name','web_name','player_team',\
'understat_id','h_team','a_team','round'])
trainingData_year_df = trainingData_year_df.drop(columns=['element','opponent_team','player_name','web_name','player_team',\
'understat_id','h_team','a_team','round'])
predictData_df = predictData_df.drop(columns=['element','opponent_team','player_name','web_name','player_team',\
'understat_id','h_team','a_team','round'])
trainingData_df = trainingData_df.apply(pd.to_numeric)
trainingData_year_df = trainingData_year_df.apply(pd.to_numeric)
predictData_df = predictData_df.apply(pd.to_numeric)
label_df = pd.concat([trainingData_df['label'],trainingData_year_df['label']])
trainingData_df = trainingData_df.drop(columns=['label'])
trainingData_year_df = trainingData_year_df.drop(columns=['label'])
trainingData_df = pd.concat([trainingData_df,trainingData_year_df])
X = np.array(trainingData_df)
y = np.array(label_df)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.15,random_state=29)
#original
#clf = RandomForestRegressor(random_state=5).fit(X_train, y_train)
splits = 2
folds = KFold(n_splits = splits)
hyper_params = {'n_estimators':[200], 'max_features':['auto'], 'bootstrap':[True],'verbose':[3],'warm_start':[True],'max_depth':[10]}
clf = GridSearchCV(estimator = RandomForestRegressor(),param_grid = hyper_params, scoring= 'neg_median_absolute_error', cv = folds, verbose = 3, return_train_score=True, n_jobs=-1)
clf.fit(X_train, y_train)
cv_results = pd.DataFrame(clf.cv_results_)
cv_results.to_csv(savePath + "bestParam.csv", encoding='utf-8', index = False)
with open(savePath+'RF.pickle','wb') as f:
pickle.dump(clf,f)
'''
pickle_in = open('RF.pickle','rb')
clf = pickle.load(pickle_in)
'''
y_test_pred = clf.predict(X_test)
accuracy = round(r2_score(y_test,y_test_pred)*100,2)
print(accuracy)
f = open(savePath+str(accuracy) + ".txt","w+")
f.write("RMSE : "+str(mean_squared_error(y_test, y_test_pred, squared=False))+"\n")
f.write("MAE : "+str(mean_absolute_error(y_test, y_test_pred)))
f.close()
X = np.array(predictData_df)
forecast = clf.predict(X)
save_df['points'] = forecast
save_df = pd.pivot_table(save_df, values=['points'], index=['player_name', 'player_team','element_type','element','round','value'], aggfunc=np.sum).reset_index()
df = save_df.sort_values(["round","points"], ascending=[True,False])
df.to_csv(savePath + "PredictRF.csv", encoding='utf-8', index = False)