-
Notifications
You must be signed in to change notification settings - Fork 3
/
quantile-score.R
executable file
·78 lines (59 loc) · 2.62 KB
/
quantile-score.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env Rscript
# Setup environment
suppressPackageStartupMessages(library(tidyverse))
source("covidhub-common.R")
ddt <- Sys.getenv("ddt")
targ_dir <- file.path("hopkins", ddt)
output_dir <- "forecasts"
datf <- load_hopkins(targ_dir) %>% rename(true_value = "value")
forecast_paths <- dir(output_dir, full.names = TRUE)
# Calculate errors
score <- function(path, tdf2) {
fcst <- read_forecast(path) %>% mutate(target_type = str_remove(target, "^\\d+ "))
ptdf <- fcst %>% filter(type == "point") %>%
left_join(tdf2, by = c("target_end_date", "target_type", "location")) %>%
filter(!is.na(true_value)) %>%
mutate(error = value - true_value)
probdf <- fcst %>% filter(type == "quantile") %>%
left_join(tdf2, by = c("target_end_date", "target_type", "location")) %>%
filter(!is.na(true_value)) %>%
mutate(is_below = true_value < value,
brier_loss = (is_below - quantile) ^ 2) %>%
mutate(pinball_loss = purrr::map2_dbl(true_value - value, quantile,
verification::check.func))
probsumdf <- probdf %>% group_by(forecast_date, target) %>%
summarise(mean_quantile_score = mean(pinball_loss),
mean_brier_score = mean(brier_loss), .groups = "drop")
list(point = ptdf, prob = probdf, probsum = probsumdf)
}
scores <- map(forecast_paths, score, tdf2 = datf)
residuals <-
map(scores, "prob") %>%
bind_rows() %>%
mutate(loc_type = case_when(location == "US" ~ "national",
nchar(location) == 2 ~ "state",
TRUE ~ "county" ))
# Summarize errors
summary <- list()
summary$by_loc_type <-
residuals %>%
group_by(loc_type) %>%
summarise(mean_qs = mean(pinball_loss), .groups = "drop")
summary$by_loc_type_targ_type <-
residuals %>%
group_by(loc_type, target_type) %>%
summarise(mean_qs = mean(pinball_loss), .groups = "drop")
summary$by_loc_targ_fdt <-
residuals %>%
group_by(loc_type, target_type, forecast_date) %>%
summarise(mean_qs = mean(pinball_loss), .groups = "drop")
# Create output
dir.create("metrics")
resids_path <- file.path("metrics", paste0(ddt, "-residuals.rds"))
saveRDS(residuals, resids_path)
summary_plot_path <- file.path("metrics", paste0(ddt, "-score-by-loc-type.csv"))
write_csv(summary$by_loc_type, path = summary_plot_path)
summary_plot_path <- file.path("metrics", paste0(ddt, "-score-by-loc-type-targ-type.csv"))
write_csv(summary$by_loc_type_targ_type, path = summary_plot_path)
summary_plot_path <- file.path("metrics", paste0(ddt, "-score-by-loc-type-targ-type-forecast-date.csv"))
write_csv(summary$by_loc_targ_fdt, path = summary_plot_path)