-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstock_prices_anomaly_detection.R
104 lines (88 loc) · 3.08 KB
/
stock_prices_anomaly_detection.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# # # -*- coding: utf-8 -*-
# # """Stock Prices Anomaly Detection.ipynb
# #
# # Automatically generated by Colaboratory.
# #
# # Original file is located at
# # https://colab.research.google.com/drive/1i923pG09SlhpnZ_n_AckL2cLTmhYwHg6
# #
# # ### **Loading Libraries**
# """
if(!require(devtools)) install.packages("devtools")
devtools::install_github("business-science/anomalize")
devtools::install_github("twitter/AnomalyDetection")
if(!require(anomalize)) install.packages("anomalize")
if(!require(AnomalyDetection)) install.packages("AnomalyDetection")
if(!require(quantmod)) install.packages("quantmod")
library(quantmod)
install.packages("dplyr")
library(dplyr)
library(tibbletime)
library(ggplot2)
library(lubridate)
# """### **Getting Data**"""
gsachs <- getSymbols("GS", src = "yahoo", from = "2016-01-01", to = as.character(Sys.Date()), auto.assign = FALSE)
gsachs_df <- data.frame(date = index(gsachs), close = as.numeric(gsachs[,'GS.Close']))
gsachs_tbl=tibble::as_tibble(gsachs_df)
dim(gsachs_tbl)
summary(gsachs_tbl)
tail(gsachs_tbl)
#
# """## **Business Science : Tidy anomaly detection**
#
# ### **Normal Anomaly Detection**
# """
options(repr.plot.width = 25, repr.plot.height = 10)
gsachs_tbl %>%
time_decompose(close) %>%
anomalize(remainder, alpha = 0.5) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)
# """### **Decomposed Anomaly Detection with Trend and Seasonality Adjusted**"""
gsachs_tbl <- prep_tbl_time(gsachs_tbl) %>%
ungroup()
gsachs_tbl_anomalized <- gsachs_tbl %>%
time_decompose(close,frequency = "auto",
trend = "2 weeks") %>%
anomalize(remainder) %>%
time_recompose()
gsachs_decomposed <- gsachs_tbl_anomalized %>%
plot_anomaly_decomposition() +
ggtitle("Decomposed Anomaly Plot")
gsachs_decomposed
# """### **Detection with 5% and 15% Anomalies Allowed**"""
anom_20 <- gsachs_tbl %>%
time_decompose(close) %>%
anomalize(remainder, alpha = 0.3, max_anoms = 0.15) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE) +
ggtitle("15% Anomalies")
anom_5 <- gsachs_tbl %>%
time_decompose(close) %>%
anomalize(remainder, alpha = 0.3, max_anoms = 0.05) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE) +
ggtitle("5% Anomalies")
anom_20
anom_5
# """### **STL Decomposition Method vs Twitter Decomposition Method**"""
# STL Decomposition Method
p1 <- gsachs_tbl %>%
time_decompose(close,
method = "stl",
frequency = "1 week",
trend = "3 months") %>%
anomalize(remainder) %>%
plot_anomaly_decomposition() +
ggtitle("STL Decomposition")
# Twitter Decomposition Method
p2 <- gsachs_tbl %>%
time_decompose(close,
method = "twitter",
frequency = "1 week",
trend = "3 months") %>%
anomalize(remainder) %>%
plot_anomaly_decomposition() +
ggtitle("Twitter Decomposition")
p1
p2