-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRandom Forest Code.R
144 lines (104 loc) · 4.51 KB
/
Random Forest Code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
library(randomForest)
library(quantmod)
library(ggplot2)
library(dplyr)
library(TTR)
library(caret)
########################################################################################
##########################################################################################
getSymbols("SPY", src = "yahoo", from = "2016-01-01")
data = Cl(SPY) # Use closing prices
data = na.omit(data) # Remove NA values
# Creating a lagged variables
data$lags1 = lag(data, 1)
data$lags2 = lag(data$SPY.Close, 2)
data$lags3 = lag(data$SPY.Close, 3)
data$SMA20 = SMA(data$SPY.Close, n=20) # 20-period simple moving average
data$RSI14 = RSI(data$SPY.Close, n=14) # 14-period Relative Strength Index
data$MACD = MACD(data$SPY.Close)$macd # MACD
data$EMA10 = EMA(data$SPY.Close, n=10)
data$Volume = Vo(SPY)
data = na.omit(data) # Remove NAs generated by lag function
train_indices = 1:(nrow(data) * 0.8) # 80% of data for training
train_data = data[train_indices, ]
test_data = data[-train_indices, ]
print(test_data)
names(test_data)
head(test_data)
str(train_data)
# Convert the data to a data frame if it's not already one
if (!is.data.frame(train_data)) {
train_data = as.data.frame(train_data)
}
# Convert the test_data as well
if (!is.data.frame(test_data)) {
test_data = as.data.frame(test_data)
}
num_predictors = ncol(train_data) - 1
# Ensure the seed for reproducibility
set.seed(123)
rf_model = randomForest(SPY.Close ~ ., data = train_data, ntree = 500,
mtry = max(1, min(num_predictors, 5)), importance = TRUE)
test_pred = predict(rf_model, newdata = test_data)
str(train_data)
summary(rf_model)
importance(rf_model)
varImpPlot(rf_model)
test_pred=data.frame(test_pred)
test_pred = test_pred %>%
mutate(Date = as.Date(row.names(.))) %>%
select(Date, everything())
test_data = test_data %>%
mutate(Date = as.Date(row.names(.))) %>%
select(Date, everything())
final_plot_data = merge(test_data, test_pred, by = "Date")
names(final_plot_data)
ggplot(final_plot_data, aes(x = Date)) +
geom_line(aes(y = SPY.Close), color = 'red', linewidth = 0.2) +
geom_line(aes(y = test_pred), color = 'blue', linewidth = 0.2) +
labs(title = "Comparison of Actual and Predicted SPY Stock Prices", x = "Date", y = "Stock Price")
#############################Simulation###########################################################
names(final_plot_data) = c("Date", "Actual", "Lags1","lags2","lags3","SMA20",
"RSI14","MACD", "EMA10", "Volume", "Predicted")
final_plot_data$Position = rep("None", nrow(final_plot_data)) # Tracks whether we are long or short
final_plot_data$Entry_Price = rep(NA, nrow(final_plot_data)) # Price at which we entered a trade
final_plot_data$Portfolio = rep(NA, nrow(final_plot_data)) # Initialize Portfolio column with NA
shares = 0
cash = 10000 # Starting cash
head(final_plot_data)
### Trading Loop for Long Only Strategy
for (i in 2:nrow(final_plot_data)) {
current_price = final_plot_data$Actual[i]
predicted_price = final_plot_data$Predicted[i]
entry_price = final_plot_data$Entry_Price[i-1]
if (is.na(entry_price)) { # No open position
if (predicted_price > current_price) { # Buy signal
final_plot_data$Position[i] = "Long"
final_plot_data$Entry_Price[i] = current_price
shares = cash / current_price
cash = 0 # Invest all cash into shares
}
} else { # Already holding a position
gain_loss_pct = (current_price - entry_price) / entry_price * 100
if ((gain_loss_pct >= 5) || (gain_loss_pct <= -1)) {
cash = shares * current_price # Close the position
shares = 0
final_plot_data$Position[i] = "None"
final_plot_data$Entry_Price[i] = NA
} else {
final_plot_data$Position[i] = "Long" # Continue holding
final_plot_data$Entry_Price[i] = entry_price
}
}
# Update portfolio value
final_plot_data$Portfolio[i] = if (shares == 0) cash else shares * current_price
}
### Output the head of the modified data for verification
head(final_plot_data)
ggplot(final_plot_data, aes(x = Date)) +
geom_line(aes(y = Portfolio),color = "green") +
labs(title = "Portfolio Value Over Time", x = "Date", y = "Total Value", color = "Legend") +
theme_dark()
#Buy and hold return for the period
BHR=10000*(1+(current_price/head(final_plot_data$Actual,1)-1))
########################################################################################