-
Notifications
You must be signed in to change notification settings - Fork 1
/
stock_trader_with_trend.py
351 lines (293 loc) · 16.2 KB
/
stock_trader_with_trend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
from IPython.core.debugger import set_trace
import os.path
from os import path
import random
import torch
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
from dqn_agent import Agent
import pandas as pd
import statistics
window_size = 10
portfolio_size = 1
investment_size = 1
trend_size = 1
input_size = window_size + portfolio_size +investment_size +trend_size
tb = SummaryWriter() #initialize tensorboard object
agent = Agent(state_size = input_size, action_size=3, seed=0)
agent.qnetwork_local.load_state_dict(torch.load('checkpoints/checkpoint_qnetwork_local_WMT_V5_.pth'))
agent.qnetwork_target.load_state_dict(torch.load('checkpoints/checkpoint_qnetwork_target_WMT_V5_.pth'))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def sigmoid(x):
return (1 / (1 + np.exp(-x)))
def stocks_price_format(n):
if n < 0:
return "- $ {0:2f}".format(abs(n))
else:
return "$ {0:2f}".format(abs(n))
def state_creator(data_gp, timestep, window_size, inventory_gp, investment,episode,trend_gp):
gp_assest = (len(inventory_gp) * data_gp[timestep]) #portfolio value for gp at current timestep
state = []
for i in range(timestep-window_size+1, timestep+1):
state.append(data_gp[i]-data_gp[i-1])
state.append(0.0001*gp_assest)
state.append(0.0001*investment)
state.append(trend_gp[timestep])
state = np.array([state]) # normalized input(state)
#state = sigmoid(state)
return state
#loading the training data
dataset_gp = pd.read_csv('datasets/WMT Historical Data 2018.csv')
data_gp = list(dataset_gp['Price'])
trend_gp = list(dataset_gp['trend'])
data_samples = len(data_gp)-1
scores = []
#loading the validation data
dataset_gp_evaluation = pd.read_csv('datasets/WMT Historical Data 2019.csv')
data_gp_evaluation = list(dataset_gp_evaluation['Price'])
trend_gp_evaluation = list(dataset_gp_evaluation['trend'])
# storing average rewards and Q-values
rewards_avg = deque(maxlen=200) # average reward in each epoach
state_action_value_averages = deque(maxlen=200) # store avg Q value for each epoach
#evaluation loop
def evaluation():
#setting up the parameter
data_samples = len(data_gp_evaluation)-1
inventory_gp_evaluation = []
total_profit = 0
investment_evaluation = 10000
episode = 1
state = state_creator(data_gp_evaluation,10,window_size,inventory_gp_evaluation,investment_evaluation,episode,trend_gp_evaluation)
for t in range(10,data_samples-1):
action, state_action_value = agent.act(state) # return action and corresponding Q value
if action == 1 and int(investment_evaluation/data_gp_evaluation[t])>0: #buy
no_buy = int(investment_evaluation/data_gp_evaluation[t])
for i in range(no_buy):
investment_evaluation -= data_gp_evaluation[t]
inventory_gp_evaluation.append(data_gp_evaluation[t])
print("AI Trader bought: ", stocks_price_format(no_buy*data_gp_evaluation[t]))
if action == 2 and len(inventory_gp_evaluation)>0: #Selling
buy_prices_gp = []
for i in range(len(inventory_gp_evaluation)):
buy_prices_gp.append(inventory_gp_evaluation[i])
buy_price = sum(buy_prices_gp) #buying price of gp stocks
total_profit += (len(inventory_gp_evaluation)*data_gp_evaluation[t]) - buy_price
profit = (len(inventory_gp_evaluation)*data_gp_evaluation[t]) - buy_price
investment_evaluation = investment_evaluation + (len(inventory_gp_evaluation)*data_gp_evaluation[t]) #total investment or cash in hand
print("AI Trader sold gp: ", stocks_price_format(len(inventory_gp_evaluation)*data_gp_evaluation[t])," Profit: " + stocks_price_format(profit))
for i in range(len(inventory_gp_evaluation)):
inventory_gp_evaluation.pop(0) # empty the gp inventory after selling all of them
if action == 0: #hold
print("AI Trader is holding........")
next_state = state_creator(data_gp_evaluation,t+1,window_size,inventory_gp_evaluation,investment_evaluation,episode,trend_gp_evaluation)
if investment_evaluation<=0 and len(inventory_gp_evaluation)==0: #checking for bankcrapcy
print("########################")
print("TOTAL PROFIT: {}".format(total_profit))
print("AI Trader is bankcrapted")
scores.append(total_profit)
print("########################")
break # if bankcrapted end the seassion
if t == data_samples - 11:
done = True
else:
done = False
state = next_state #assin next state to present state
print("########################")
print("TOTAL VALIDATION PROFIT: {}".format(total_profit))
print("########################")
def dqn(n_episodes=5000, max_t=len(data_gp)-20, eps_start=1, eps_end=0.001, eps_decay=0.995):
"""Deep Q-Learning.
Params
======
n_episodes (int): maximum number of training episodes
max_t (int): maximum number of timesteps per episode
eps_start (float): starting value of epsilon, for epsilon-greedy action selection
eps_end (float): minimum value of epsilon
eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
"""
sell_date = deque(maxlen=1)
eps = eps_start # initialize epsilon
for episode in range(1, n_episodes+1):
print("Episode: {}/{}".format(episode, n_episodes))
investment = 10000
inventory_gp = []
state_action_values = []
rewards = []
state = state_creator(data_gp,10,window_size,inventory_gp,investment,episode,trend_gp)
total_profit = 0
total_reward = 0
for t in range(10,max_t):
action, state_action_value = agent.act(state, eps) # return action and corresponding Q value
state_action_values.append(state_action_value)
reward = -100
if action == 1 and int(investment/data_gp[t])>0: #Buying gp
no_buy = int(investment/data_gp[t])
for i in range(no_buy):
investment -= data_gp[t]
inventory_gp.append(data_gp[t])
fifteen_days_min = min(data_gp[t+1:t+16])
if data_gp[t] < fifteen_days_min:
reward = 1
else:
reward = -1
rewards.append(reward)
print("AI Trader bought: ", stocks_price_format(no_buy*data_gp[t])," Reward: " + stocks_price_format(reward))
if action == 2 and len(inventory_gp) > 0: #Selling gp
buy_prices_gp = []
for i in range(len(inventory_gp)):
buy_prices_gp.append(inventory_gp[i])
buy_price = sum(buy_prices_gp) #buying price of gp stocks
fifteen_days_max = max(data_gp[t+1:t+16])
if data_gp[t] > fifteen_days_max:
reward = 1
else:
reward = -1
total_profit += (len(inventory_gp)*data_gp[t]) - buy_price
profit = (len(inventory_gp)*data_gp[t]) - buy_price
investment = investment + (len(inventory_gp)*data_gp[t]) #total investment or cash in hand
rewards.append(reward)
sell_date.append(data_gp[t])
print("AI Trader sold: ", stocks_price_format(len(inventory_gp)*data_gp[t]), " Profit: " + stocks_price_format(profit)," Reward: " , stocks_price_format(reward))
for i in range(len(inventory_gp)):
inventory_gp.pop(0)
if action == 0:
reward = 0
print("AT Trader is holding.................","Reward: ",stocks_price_format(reward))
next_state = state_creator(data_gp,t+1,window_size,inventory_gp,investment,episode,trend_gp)
if investment<=0 and len(inventory_gp)==0: #checking for bankcrapcy
reward = -1000
done = True
agent.step(state, action, reward, next_state, done)
print("########################")
print("TOTAL PROFIT: {}".format(total_profit))
print("AI Trader is bankcrapted")
scores.append(total_profit)
print("########################")
break # if bankcrapted end the seassion
if t == max_t - 1:
done = True
else:
done = False
agent.step(state, action, reward, next_state, done)
state = next_state
if done:
print("########################")
print("TOTAL PROFIT: {}".format(total_profit))
#scores.append(total_profit)
rewards_avg.append(statistics.mean(rewards))
state_action_value_averages.append(statistics.mean(state_action_values))
tb.add_scalar("average reward", statistics.mean(rewards), episode) #add average reward to tensorboard
tb.add_scalar("average Q-value", statistics.mean(state_action_values), episode) #add average Q_value to tensorboard
tb.add_histogram("local network fc1 layer bias", agent.qnetwork_local.fc1.bias, episode) # add first hidden layer bias to tensorboard
tb.add_histogram("local network fc2 layer bias", agent.qnetwork_local.fc2.bias, episode) # add second hidden layer bias to tensorboard
tb.add_histogram("local network fc3 layer bias", agent.qnetwork_local.fc3.bias, episode)
tb.add_histogram("local network fc4 layer bias", agent.qnetwork_local.fc4.bias, episode)
tb.add_histogram("local network fc5 layer bias", agent.qnetwork_local.fc5.bias, episode)
tb.add_histogram("local network fc1 layer weight", agent.qnetwork_local.fc1.weight, episode) # add first hidden layer weights to tensorboard
tb.add_histogram("local network fc2 layer weight", agent.qnetwork_local.fc2.weight, episode) # add second hidden layer weights to tensorboard
tb.add_histogram("local network fc3 layer weight", agent.qnetwork_local.fc3.weight, episode)
tb.add_histogram("local network fc4 layer weight", agent.qnetwork_local.fc4.weight, episode)
tb.add_histogram("local network fc5 layer weight", agent.qnetwork_local.fc5.weight, episode)
print("########################")
evaluation()
eps = max(eps_end, eps_decay*eps) # decrease epsilon
#save the model weights
if episode == 100:
torch.save(agent.qnetwork_local.state_dict(), 'checkpoint_qnetwork_local_WMT_V5_.pth')
torch.save(agent.qnetwork_target.state_dict(), 'checkpoint_qnetwork_target_WMT_V5_.pth')
return rewards_avg
scores = dqn()
tb.close() # close the tensorboard object
################################################################################################
#save the model weights
torch.save(agent.qnetwork_local.state_dict(), 'checkpoint_qnetwork_local_WMT_V5_.pth')
torch.save(agent.qnetwork_target.state_dict(), 'checkpoint_qnetwork_target_WMT_V5_.pth')
################################################################################################
# plot the average_reward for each epoach
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(rewards_avg)), rewards_avg)
plt.ylabel('average rewards of WMT')
plt.xlabel('Episode #')
plt.show()
# plot the average_Q values for each epoach
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(state_action_value_averages)), state_action_value_averages)
plt.ylabel('average Q-values of WMT')
plt.xlabel('Episode #')
plt.show()
"""Test the agent over training set"""
agent = Agent(state_size = input_size, action_size=3, seed=0)
agent.qnetwork_local.load_state_dict(torch.load('checkpoint_qnetwork_local_WMT_V5_.pth'))
agent.qnetwork_target.load_state_dict(torch.load('checkpoint_qnetwork_target_WMT_V5_.pth'))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#import the test data
dataset_gp_test = pd.read_csv('datasets/WMT Historical Data 2019.csv')
data_gp_test = list(dataset_gp_test['Price'])
trend_gp_test = list(dataset_gp_test['trend'])
#test loop
timesteps_sells = []
sells = []
timesteps_buys = []
buys = []
def test():
#setting up the parameter
data_samples = len(data_gp_test)-1
inventory_gp_test = []
total_profit = 0
investment_test = 10000
episode = 1
state = state_creator(data_gp_test,10,window_size,inventory_gp_test,investment_test,episode,trend_gp_test)
for t in range(10,data_samples-1):
action, state_action_value = agent.act(state)
if action == 1 and int(investment_test/data_gp_test[t])>0: #buy
no_buy = int(investment_test/data_gp_test[t])
for i in range(no_buy):
investment_test -= data_gp_test[t]
inventory_gp_test.append(data_gp_test[t])
timesteps_buys.append(t-10)
buys.append(data_gp_test[t])
print("AI Trader bought: ", stocks_price_format(no_buy*data_gp_test[t]), "Investment= ",stocks_price_format(investment_test))
if action == 2 and len(inventory_gp_test)>0: #Selling
buy_prices_gp = []
for i in range(len(inventory_gp_test)):
buy_prices_gp.append(inventory_gp_test[i])
buy_price = sum(buy_prices_gp) #buying price of gp stocks
total_profit += (len(inventory_gp_test)*data_gp_test[t]) - buy_price
profit = (len(inventory_gp_test)*data_gp_test[t]) - buy_price
investment_test = investment_test + (len(inventory_gp_test)*data_gp_test[t]) #total investment or cash in hand
timesteps_sells.append(t-10)
sells.append(data_gp_test[t])
print("AI Trader sold gp: ", stocks_price_format(len(inventory_gp_test)*data_gp_test[t])," Profit: " + stocks_price_format(profit),"Investment= ",stocks_price_format(investment_test))
for i in range(len(inventory_gp_test)):
inventory_gp_test.pop(0) # empty the gp inventory after selling all of them
if action == 0: #hold
print("AI Trader is holding........","Investment= ",stocks_price_format(investment_test))
next_state = state_creator(data_gp_test,t+1,window_size,inventory_gp_test,investment_test,episode,trend_gp_test)
if investment_test<=0 and len(inventory_gp_test)==0: #checking for bankcrapcy
print("########################")
print("TOTAL PROFIT: {}".format(total_profit))
print("AI Trader is bankcrapted")
print("########################")
break # if bankcrapted end the seassion
state = next_state #assin next state to present state
print("########################")
print("TOTAL TEST PROFIT: {}".format(total_profit))
print("########################")
test()
#plot the graph
stock_price = data_gp_test[10:]
plt.plot(stock_price, color = 'blue', label = 'WMT')
plt.scatter(timesteps_sells,sells,color='red',label='sell')
plt.scatter(timesteps_buys,buys,color='black',label='buy')
plt.title('WMT stock')
plt.xlabel('Time')
plt.ylabel('WMT Price')
plt.legend()
plt.show()