-
Notifications
You must be signed in to change notification settings - Fork 3
/
forecast-package-covidhub-forecast.R
99 lines (86 loc) · 3.23 KB
/
forecast-package-covidhub-forecast.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env Rscript
tictoc::tic()
suppressPackageStartupMessages(library(tidyverse))
source("covidhub-common.R")
RNGkind("L'Ecuyer-CMRG")
set.seed(1)
mconfig <- ini::read.ini("model.ini")
eval_string <- function(x){
str2lang(x) %>% eval()
}
mconfig$model_pars <- map(mconfig$model_pars, eval_string)
forecast_dates <- Sys.getenv("fdt") %>% as.Date()
hopdir <- file.path("hopkins", forecast_dates)
tdat <- load_hopkins(hopdir) #%>% filter(str_detect(location, "^13"))
rw_forecast <- function(df, fdt, npaths = 100,
h = 4, tailn = Inf, include_drift = FALSE){
stopifnot(npaths > 0)
last_end_date <- max(df$target_end_date)
stopifnot(fdt - last_end_date < lubridate::ddays(3))
df1 <- df %>% filter(target_end_date <= fdt)
## remove oobservations more that 2 sds away from mean
win <- ts(df1$value) %>% tail(n = max(tailn, 14))
sigma <- sd(win)
m <- mean(win)
is_outlier <- abs(win - m) / (2 * sigma) > 1
win[is_outlier] <- NA
y <- tail(win[!is.na(win)], n = tailn)
stopifnot(length(y) == tailn)
object <- forecast::naive(y, h = 1)$model
sim <- matrix(NA, nrow = npaths, ncol = h)
for (i in 1:npaths) {
sim[i, ] <- simulate(object, nsim = h, bootstrap = FALSE)
}
sim1 <- (sim + abs(sim)) / 2 #to zero out any negative values
sim_dates <- seq(last_end_date + 7, last_end_date + h *7, by = 7)
colnames(sim1) <- as.character(sim_dates)
sim12 <- cbind(Rep = seq_len(nrow(sim1)), sim1) %>% as_tibble()
simdf <- sim12 %>% pivot_longer(-Rep, names_to = "Date",
values_to = "value")
simdf %>% mutate(Date = lubridate::ymd(Date))
}
reshape_paths_like_pompout <- function(x){
x %>% mutate(target_type = recode(target_type,
`wk ahead inc death`= "deaths",
`wk ahead inc case` = "cases")) %>%
pivot_wider(id_cols = c(Rep, Date), names_from = "target_type",
values_from = "value")
}
sim_paths <- function(fdt, tdat, mname, odir, ...) {
tdat1 <- tdat %>%
filter(target_type %in% c("wk ahead inc death", "wk ahead inc case")) %>%
group_by(location, target_type) %>%
arrange(target_end_date) %>%
nest() %>%
mutate(paths = map(data, rw_forecast, fdt = fdt, ...)) %>%
select(-data) %>%
unnest(cols = paths) %>%
group_by(location) %>%
nest() %>%
mutate(paths2 = map(data, reshape_paths_like_pompout)) %>%
select(-data) %>%
mutate(fcst_df = map2(paths2, location, paths_to_forecast, hop = tdat,
fdt = fdt))
full <- bind_rows(tdat1$fcst_df)
fname <- paste0(fdt, "-CEID-", mname, ".csv")
if (!dir.exists(odir)) {
dir.create(odir)
}
opath <- file.path(odir, fname)
write_csv(full, opath)
}
paths <- map(forecast_dates,
sim_paths,
tdat = tdat,
npaths = mconfig$model_pars$npaths,
tailn = mconfig$model_pars$tailn,
include_drift = mconfig$model_pars$include_drift,
odir = "forecasts",
mname = "Walk")
## Produce metrics
time <- tictoc::toc()
walltime <- list(wall = time$toc - time$tic)
dir.create("metrics")
mpath <- file.path("metrics", paste0(forecast_dates,
"-forecast-calc-time.json"))
jsonlite::write_json(walltime, mpath, auto_unbox = TRUE)