Random Forest Code.R

library(randomForest)
library(quantmod)
library(ggplot2)
library(dplyr)
library(TTR)
library(caret)


########################################################################################
##########################################################################################

getSymbols("SPY", src = "yahoo", from = "2016-01-01")
data = Cl(SPY)  # Use closing prices

data = na.omit(data)  # Remove NA values


# Creating a lagged variables
data$lags1 = lag(data, 1)
data$lags2 = lag(data$SPY.Close, 2)
data$lags3 = lag(data$SPY.Close, 3)
data$SMA20 = SMA(data$SPY.Close, n=20)  # 20-period simple moving average
data$RSI14 = RSI(data$SPY.Close, n=14)  # 14-period Relative Strength Index
data$MACD = MACD(data$SPY.Close)$macd  # MACD
data$EMA10 = EMA(data$SPY.Close, n=10)
data$Volume = Vo(SPY)

data = na.omit(data)  # Remove NAs generated by lag function

train_indices = 1:(nrow(data) * 0.8)  # 80% of data for training
train_data = data[train_indices, ]
test_data = data[-train_indices, ]

print(test_data)
names(test_data)
head(test_data)


str(train_data)

# Convert the data to a data frame if it's not already one
if (!is.data.frame(train_data)) {
  train_data = as.data.frame(train_data)
}

# Convert the test_data as well
if (!is.data.frame(test_data)) {
  test_data = as.data.frame(test_data)
}



num_predictors = ncol(train_data) - 1

# Ensure the seed for reproducibility
set.seed(123)
rf_model = randomForest(SPY.Close ~ ., data = train_data, ntree = 500, 
                         mtry = max(1, min(num_predictors, 5)), importance = TRUE)


test_pred = predict(rf_model, newdata = test_data)
str(train_data)
summary(rf_model)


importance(rf_model)
varImpPlot(rf_model)


test_pred=data.frame(test_pred)

test_pred = test_pred %>%
  mutate(Date = as.Date(row.names(.))) %>%
  select(Date, everything())

test_data = test_data %>%
  mutate(Date = as.Date(row.names(.))) %>%
  select(Date, everything())

final_plot_data = merge(test_data, test_pred, by = "Date")
names(final_plot_data)

ggplot(final_plot_data, aes(x = Date)) +
  geom_line(aes(y = SPY.Close), color = 'red', linewidth = 0.2) +
  geom_line(aes(y = test_pred), color = 'blue', linewidth = 0.2) +
  labs(title = "Comparison of Actual and Predicted SPY Stock Prices", x = "Date", y = "Stock Price")


#############################Simulation###########################################################


names(final_plot_data) = c("Date", "Actual", "Lags1","lags2","lags3","SMA20", 
                            "RSI14","MACD", "EMA10",  "Volume", "Predicted")

final_plot_data$Position = rep("None", nrow(final_plot_data))  # Tracks whether we are long or short
final_plot_data$Entry_Price = rep(NA, nrow(final_plot_data))  # Price at which we entered a trade
final_plot_data$Portfolio = rep(NA, nrow(final_plot_data))  # Initialize Portfolio column with NA
shares = 0
cash = 10000  # Starting cash
head(final_plot_data)

### Trading Loop for Long Only Strategy
for (i in 2:nrow(final_plot_data)) {
  current_price = final_plot_data$Actual[i]
  predicted_price = final_plot_data$Predicted[i]
  entry_price = final_plot_data$Entry_Price[i-1]
  
  if (is.na(entry_price)) {  # No open position
    if (predicted_price > current_price) {  # Buy signal
      final_plot_data$Position[i] = "Long"
      final_plot_data$Entry_Price[i] = current_price
      shares = cash / current_price
      cash = 0  # Invest all cash into shares
    }
  } else {  # Already holding a position
    gain_loss_pct = (current_price - entry_price) / entry_price * 100
    
    if ((gain_loss_pct >= 5) || (gain_loss_pct <= -1)) {
      cash = shares * current_price  # Close the position
      shares = 0
      final_plot_data$Position[i] = "None"
      final_plot_data$Entry_Price[i] = NA
    } else {
      final_plot_data$Position[i] = "Long"  # Continue holding
      final_plot_data$Entry_Price[i] = entry_price
    }
  }
  
  # Update portfolio value
  final_plot_data$Portfolio[i] = if (shares == 0) cash else shares * current_price
}

### Output the head of the modified data for verification
head(final_plot_data)

ggplot(final_plot_data, aes(x = Date)) +
  geom_line(aes(y = Portfolio),color = "green") +
  labs(title = "Portfolio Value Over Time", x = "Date", y = "Total Value", color = "Legend") +
  theme_dark()

#Buy and hold return for the period
BHR=10000*(1+(current_price/head(final_plot_data$Actual,1)-1))
########################################################################################