Skip to content

Commit

Permalink
finalize how_to_cite
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed Dec 2, 2024
1 parent 0e9613c commit 146d9ba
Show file tree
Hide file tree
Showing 22 changed files with 421 additions and 81 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# mvgam 1.1.4 (development version; not yet on CRAN)
## New functionalities
* Added the `how_to_cite.mvgam()` function to generate a scaffold methods description of fitted models, which can hopefully make it easier for users to fully describe their programming environment
* Improved various plotting functions by returning `ggplot` objects in place of base plots (thanks to @mhollanders #38)
* Added the brier score (`score = 'brier'`) as an option in `score.mvgam_forecast()` for scoring forecasts of binary variables when using `family = bernoulli()` (#80)
* Added `augment()` function to add residuals and fitted values to an mvgam object's observed data (thanks to @swpease #83)
Expand Down
3 changes: 2 additions & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ utils::globalVariables(c("y", "year", "smooth_vals", "smooth_num",
"level", "sim_hilbert_gp", "trend_model",
"jags_path", "x", "elpds", "pareto_ks",
"value", "threshold", "colour", "resids",
"c_dark", "eval_timepoints"))
"c_dark", "eval_timepoints", "yqlow",
"ymidlow", "ymidhigh", "yqhigh", "preds"))
179 changes: 125 additions & 54 deletions R/how_to_cite.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
#'text returned by this function to be a completely adequate methods section, it is only
#'meant to get you started.
#'@return An object of class \code{how_to_cite} containing a text description of the
#'methods as well as lists of primary and additional references
#'methods as well as lists of both primary and additional references
#'@author Nicholas J Clark
#'@seealso \code{\link[utils]{citation}}, \code{\link{mvgam}}, \code{\link{jsdgam}}
#' @examples
#' \donttest{
#' \dontrun{
#' # Simulate 4 time series with hierarchical seasonality
#' # and a VAR(1) dynamic process
#' set.seed(111)
#' set.seed(0)
#' simdat <- sim_mvgam(seasonality = 'hierarchical',
#' trend_model = VAR(cor = TRUE),
#' family = gaussian())
Expand All @@ -30,7 +30,6 @@
#' data = simdat$data_train,
#' family = gaussian(),
#' trend_model = VAR(cor = TRUE),
#' noncentred = TRUE,
#' chains = 2)
#' how_to_cite(mod1)
#'
Expand All @@ -43,6 +42,13 @@
#' family = gaussian(),
#' chains = 2)
#' how_to_cite(mod2)
#'
#' # Repeat using meanfield variational inference
#' mod3 <- mvgam(y ~ gp(x2, k = 12),
#' data = dat,
#' family = gaussian(),
#' algorithm = 'meanfield')
#' how_to_cite(mod3)
#' }
#'@export
how_to_cite <- function(object, ...){
Expand Down Expand Up @@ -71,7 +77,7 @@ how_to_cite.mvgam <- function(object, ...){
# mvgam-specific methods
mvgam_text <- paste0(
"We used the R package mvgam (version ",
packageVersion("mvgam"),
utils::packageVersion("mvgam"),
"; Clark & Wells, 2023) to construct, fit and interrogate the model.",
" mvgam fits Bayesian State-Space models that can include flexible",
" predictor effects in both the process and observation components",
Expand All @@ -82,16 +88,50 @@ how_to_cite.mvgam <- function(object, ...){
citations[[1]] <- "Clark, NJ and Wells K (2022). Dynamic Generalized Additive Models (DGAMs) for forecasting discrete ecological time series. Methods in Ecology and Evolution, 14, 771-784. doi.org/10.1111/2041-210X.13974"
citations[[2]] <- "Bürkner, PC (2017). brms: An R Package for Bayesian Multilevel Models Using Stan. Journal of Statistical Software, 80(1), 1-28. doi:10.18637/jss.v080.i01"
citations[[3]] <- "Wood, SN (2017). Generalized Additive Models: An Introduction with R (2nd edition). Chapman and Hall/CRC."
citations[[4]] <- "Wang W and Yan J (2021). “Shape-Restricted Regression Splines with R Package splines2.” Journal of Data
Science, 19(3), 498-517. doi:10.6339/21-JDS1020 <https://doi.org/10.6339/21-JDS1020>."
citations[[4]] <- "Wang W and Yan J (2021). Shape-Restricted Regression Splines with R Package splines2. Journal of Data Science, 19(3), 498-517. doi:10.6339/21-JDS1020 <https://doi.org/10.6339/21-JDS1020>."

# Any specials; first check whether this model used a VAR / VARMA process
specials_text <- NULL
trend_model <- attr(object$model_data, 'trend_model')
if(trend_model %in% c('VAR',
'VARcor',
'VARhiercor',
'VAR1',
'VAR1cor',
'VAR1hiercor',
'VARMA',
'VARMAcor',
'VARMA1,1cor')){
specials_text <- c(
specials_text,
" To encourage stability and prevent forecast variance from increasing indefinitely, we enforced stationarity of the Vector Autoregressive process following methods described in Heaps (2023)."
)
citations <- append(
citations,
list("Heaps, SE (2023). Enforcing stationarity through the prior in vector autoregressions. Journal of Computational and Graphical Statistics 32, 74-83.")
)
}

# Check for approximate GPs
if(!is.null(attr(object$mgcv_model, 'gp_att_table')) |
!is.null(attr(object$trend_mgcv_model, 'gp_att_table'))){
specials_text <- c(
specials_text,
" Gaussian Process functional effects were estimated using a low-rank Hilbert space approximation following methods described in Riutort-Mayol et al. (2023)."
)
citations <- append(
citations,
list("Riutort-Mayol, G, Bürkner, PC, Andersen, MR, Solin, A and Vehtari, A (2023). Practical Hilbert space approximate Bayesian Gaussian processes for probabilistic programming. Statistics and Computing 33, 1. https://doi.org/10.1007/s11222-022-10167-2")
)
}

# Stan-specific methods
citations[[5]] <- "Carpenter, B, Gelman, A, Hoffman, MD, Lee, D, Goodrich, B, Betancourt, M, Brubaker, M, Guo, J, Li, P and Riddell, A (2017). Stan: A probabilistic programming language. Journal of Statistical Software 76."
citations <- append(citations, list("Carpenter, B, Gelman, A, Hoffman, MD, Lee, D, Goodrich, B, Betancourt, M, Brubaker, M, Guo, J, Li, P and Riddell, A (2017). Stan: A probabilistic programming language. Journal of Statistical Software 76."))

stan_text <-
paste0(
" We estimated posterior distributions for model parameters",
" with Hamiltonian Monte Carlo in Stan"
" The mvgam-constructed model and observed data",
" were passed to the probabilistic programming environment Stan"
)

if(object$backend == 'cmdstanr'){
Expand All @@ -101,85 +141,116 @@ how_to_cite.mvgam <- function(object, ...){
cmdstanr::cmdstan_version(),
"; Carpenter et al. 2017, Stan Development Team ",
current_year,
"), specifically with the cmdstanr interface (Gabry & Češnovar, 2021)."
"), specifically through the cmdstanr interface (Gabry & Češnovar, 2021)."
)
citations[[6]] <- paste0(
"Gabry J, Češnovar R, Johnson A, and Bronder S (",
current_year,
"). cmdstanr: R Interface to 'CmdStan'. https://mc-stan.org/cmdstanr/, https://discourse.mc-stan.org."
citations <- append(
citations,
list(paste0(
"Gabry J, Češnovar R, Johnson A, and Bronder S (",
current_year,
"). cmdstanr: R Interface to 'CmdStan'. https://mc-stan.org/cmdstanr/, https://discourse.mc-stan.org."
)
)
)
} else {
stan_text <- paste0(
stan_text,
" (version ",
rstan::stan_version(),
"; Carpenter et al. 2017)",
", specifically with the rstan interface (Stan Development Team ",
", specifically through the rstan interface (Stan Development Team ",
current_year,
")"
)
citations[[6]] <- paste0(
"Stan Development Team (",
current_year,
"). RStan: the R interface to Stan. R package version ",
packageVersion("rstan"),
". https://mc-stan.org/."
)
citations <- append(
citations,
list(paste0(
"Stan Development Team (",
current_year,
"). RStan: the R interface to Stan. R package version ",
utils::packageVersion("rstan"),
". https://mc-stan.org/."
)
))
}

# Any specials; first check whether this model used a VAR / VARMA process
specials_text <- NULL
trend_model <- attr(object$model_data, 'trend_model')
if(trend_model %in% c('VAR',
'VARcor',
'VARhiercor',
'VAR1',
'VAR1cor',
'VAR1hiercor',
'VARMA',
'VARMAcor',
'VARMA1,1cor')){
specials_text <- c(
specials_text,
" To encourage stability and prevent forecast variance from increasing indefinitely, we enforced stationarity of the Vector Autoregressive process following methods described in Heaps (2023)."
if(object$algorithm == 'sampling'){
stan_text <- paste0(
stan_text,
" We ran ",
object$model_output@sim$chains,
" Hamiltonian Monte Carlo chains for ",
object$model_output@sim$warmup,
" warmup iterations and ",
object$model_output@sim$iter - object$model_output@sim$warmup,
" sampling iterations for joint posterior estimation.",
" Rank normalized split Rhat (Vehtari et al. 2021) and effective",
" sample sizes were used to monitor convergence."
)
citations <- append(
citations,
list("Heaps, SE (2023). Enforcing stationarity through the prior in vector autoregressions. Journal of Computational and Graphical Statistics 32, 74-83.")
list("Vehtari A, Gelman A, Simpson D, Carpenter B, and Bürkner P (2021). “Rank-normalization, folding, and localization: An improved Rhat for assessing convergence of MCMC (with discussion).” Bayesian Analysis 16(2) 667-718. https://doi.org/10.1214/20-BA1221.")
)
}

# Check for approximate GPs
if(!is.null(attr(object$mgcv_model, 'gp_att_table')) |
!is.null(attr(object$trend_mgcv_model, 'gp_att_table'))){
specials_text <- c(
specials_text,
" Gaussian Process functional effects were estimated using a low-rank Hilbert space approximation following methods described in Riutort-Mayol et al. (2023)."
if(object$algorithm %in% c('meanfield', 'fullrank')){
stan_text <- paste0(
stan_text,
" We used Stan's Automatic Differentiation Variational Inference algorithm",
" (Kucukelbir et al. 2017) for posterior approximation, specifically using ",
object$algorithm,
" algorithm to draw ",
object$model_output@sim$iter,
" samples from the approximate joint posterior."
)
citations <- append(
citations,
list("Riutort-Mayol, G, Bürkner, PC, Andersen, MR, Solin, A and Vehtari, A (2023). Practical Hilbert space approximate Bayesian Gaussian processes for probabilistic programming. Statistics and Computing 33, 1. https://doi.org/10.1007/s11222-022-10167-2")
list("Kucukelbir, A, Tran, D, Ranganath, R, Gelman, A, and Blei, DM (2017). Automatic Differentiation Variational Inference. Journal of Machine Learning Research 18 1-45.")
)
}

if(object$algorithm == c('laplace')){
stan_text <- paste0(
stan_text,
" We used Stan's Laplace approximation algorithm",
" to draw ",
object$model_output@sim$iter,
" samples from the approximate joint posterior."
)

}

if(object$algorithm == c('pathfinder')){
stan_text <- paste0(
stan_text,
" We used Stan's Pathfinder variational approximation algorithm (Zhang et al. 2022)",
" to draw ",
object$model_output@sim$iter,
" samples from the approximate joint posterior."
)

citations <- append(
citations,
list("Zhang, L, Carpenter, B, Gelman, A, and Vehtari, A (2022). Pathfinder: parallel Quasi-Newton variational inference. Journal of Machine Learning Research 23(306), 1–49. http://jmlr.org/papers/v23/21-0889.html.")
)
}
# Append texts
all_text <- paste0(mvgam_text, specials_text, stan_text)

# List of additional, possibly very useful references
other_citations <- vector(mode = 'list')
other_citations[[1]] <- "Arel-Bundock V (2024). marginaleffects: Predictions, Comparisons, Slopes, Marginal Means, and Hypothesis Tests. R package version 0.19.0.4, <https://marginaleffects.com/>."
other_citations[[1]] <- "Arel-Bundock V (2024). marginaleffects: Predictions, Comparisons, Slopes, Marginal Means, and Hypothesis Tests. R package version 0.19.0.4, https://marginaleffects.com/."
other_citations[[2]] <- "Gabry J, Simpson D, Vehtari A, Betancourt M, and Gelman A (2019). “Visualization in Bayesian workflow.” Journal of the Royal Statatistical Society A, 182, 389-402. doi:10.1111/rssa.12378."
other_citations[[3]] <- "Vehtari A, Gelman A, and Gabry J (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing, 27, 1413-1432. doi:10.1007/s11222-016-9696-4."
other_citations[[3]] <- "Vehtari A, Gelman A, and Gabry J (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing, 27, 1413-1432. doi:10.1007/s11222-016-9696-4."
other_citations[[4]] <- "Bürkner, PC, Gabry, J, and Vehtari, A. (2020). Approximate leave-future-out cross-validation for Bayesian time series models. Journal of Statistical Computation and Simulation, 90(14), 2499–2523. https://doi.org/10.1080/00949655.2020.1783262"
other_citations[[5]] <- "Vehtari A, Gelman A, Simpson D, Carpenter B, and Bürkner P (2021). “Rank-normalization, folding, and localization: An improved Rhat for assessing convergence of MCMC (with discussion).” Bayesian Analysis 16(2) 667-718. https://doi.org/10.1214/20-BA1221."

out <- structure(
list(
methods_text = all_text,
citations = citations,
other_citations = other_citations
),
class = 'how_to_cite'
methods_text = all_text,
citations = citations,
other_citations = other_citations
),
class = 'how_to_cite'
)

return(out)
Expand Down
35 changes: 35 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,41 @@ Both components contribute to forecast uncertainty. Diagnostics of the model can
plot(lynx_mvgam, type = 'residuals')
```

We can use the `how_to_cite()` function to generate a scaffold for describing the model and sampling details in scientific communications
```{r}
description <- how_to_cite(lynx_mvgam)
```

```{r, eval = FALSE}
description
```

```{r, echo=FALSE}
str_break = function(x, width = 90L) {
n = nchar(x)
if (n <= width) return(x)
n1 = seq(1L, n, by = width)
n2 = seq(width, n, by = width)
if (n %% width != 0) n2 = c(n2, n)
substring(x, n1, n2)
}
cat('Methods text skeleton\n')
cat(str_break(description$methods_text), sep = '\n')
```

```{r echo=FALSE}
cat('\nPrimary references\n')
for(i in seq_along(description$citations)){
message(description$citations[[i]])
message()
}
cat('\nOther useful references\n')
for(i in seq_along(description$other_citations)){
message(description$other_citations[[i]])
message()
}
```

## Extended observation families
`mvgam` was originally designed to analyse and forecast non-negative integer-valued data. These data are traditionally challenging to analyse with existing time-series analysis packages. But further development of `mvgam` has resulted in support for a growing number of observation families. Currently, the package can handle data for the following:

Expand Down
Loading

0 comments on commit 146d9ba

Please sign in to comment.