From eb860b5487d405e078a03cc1087b32c262fe6147 Mon Sep 17 00:00:00 2001 From: helske Date: Mon, 10 Aug 2020 15:13:14 +0300 Subject: [PATCH] added scalability example, now to CRAN --- DESCRIPTION | 4 ++-- R/walker.R | 24 ++++++++++++++++++++++++ inst/CITATION | 21 +++++++++++++++------ vignettes/walker.Rmd | 2 +- 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 67daad2..a962934 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,14 +2,14 @@ Package: walker Type: Package Title: Bayesian Generalized Linear Models with Time-Varying Coefficients Version: 0.4.1 -Date: 2020-05-19 +Date: 2020-08-10 Description: Bayesian generalized linear models with time-varying coefficients. Gaussian, Poisson, and binomial observations are supported. The Markov chain Monte Carlo computations are done using Hamiltonian Monte Carlo provided by Stan, using a state space representation of the model in order to marginalise over the coefficients for efficient sampling. For non-Gaussian models, the package uses the importance sampling type estimators based on - approximate marginal MCMC as in Vihola, Helske, Franks (2017, ). + approximate marginal MCMC as in Vihola, Helske, Franks (2020, ). Authors@R: person(given = "Jouni", family = "Helske", diff --git a/R/walker.R b/R/walker.R index 8776f9a..bd5e7ed 100644 --- a/R/walker.R +++ b/R/walker.R @@ -93,7 +93,31 @@ #' pred <- predict(fit, newdata) #' plot_predict(pred) #' } +#' \dontrun{ +#' # example on scalability +#' set.seed(1) +#' n <- 2^12 +#' beta1 <- cumsum(c(0.5, rnorm(n - 1, 0, sd = 0.05))) +#' beta2 <- cumsum(c(-1, rnorm(n - 1, 0, sd = 0.15))) +#' x1 <- rnorm(n, mean = 2) +#' x2 <- cos(1:n) +#' rw <- cumsum(rnorm(n, 0, 0.5)) +#' signal <- rw + beta1 * x1 + beta2 * x2 +#' y <- rnorm(n, signal, 0.5) #' +#' d <- data.frame(y, x1, x2) +#' +#' n <- 2^(6:12) +#' times <- numeric(length(n)) +#' for(i in seq_along(n)) { +#' times[i] <- sum(get_elapsed_time( +#' walker(y ~ 0 + rw1(~ x1 + x2, +#' beta = c(0, 10), sigma = c(0, 10)), +#' sigma_y = c(0, 10), data = d[1:n[i],], +#' chains = 1, seed = 1, refresh = 0)$stanfit)) +#' } +#' plot(log2(n), log2(times)) +#'} walker <- function(formula, data, sigma_y_prior, beta, init, chains, return_x_reg = FALSE, gamma_y = NULL, ...) { diff --git a/inst/CITATION b/inst/CITATION index 6eec837..84ef21a 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,11 +1,10 @@ c( bibentry( - bibtype = "Manual", - title = "{walker}: Bayesian Generalized Linear Models with Time-Varying Coefficients", + bibtype = "Article", author = "Jouni Helske", - year = sub("-.*", "", meta$Date), - note = sprintf("R package version %s", meta$Version), - url = "http://github.com/helske/walker", + title = " Efficient Bayesian generalized linear models with time-varying coefficients: The walker package in {R}", + journal = "Submitted", + year = "2020", key = "walker" ), bibentry( @@ -15,7 +14,17 @@ c( journal = "Ar{X}iv e-prints", eprint = "1609.02541", primaryClass = "stat.CO", - year = "2020", + year = "2017", key = "ISMCMC" + ), + bibentry( + bibtype = "Manual", + title = "{walker}: Bayesian Generalized Linear Models with Time-Varying Coefficients", + author = "Jouni Helske", + year = sub("-.*", "", meta$Date), + note = sprintf("R package version %s", meta$Version), + url = "http://github.com/helske/walker", + key = "package" ) + ) diff --git a/vignettes/walker.Rmd b/vignettes/walker.Rmd index 3c20b40..f241154 100644 --- a/vignettes/walker.Rmd +++ b/vignettes/walker.Rmd @@ -156,7 +156,7 @@ With naive implementation we get smaller effective sample sizes and much higher In this vignette we illustrated the benefits of marginalisation in the context of time-varying regression models. The underlying idea is not new; this approach is typical in classic Metropolis-type algorithms for linear-Gaussian state space models where the marginal likelihood $p(y | \theta)$ (where $\theta$ denotes the hyperparameters i.e. not the latents states such as $\beta$'s in current context) is used in the computation of the acceptance probability. Here we rely on readily available Hamiltonian Monte Carlo based `Stan` software, thus allowing us to enjoy the benefits of diverse tools of the `Stan` community. -The motivation behind the `walker` was to test the efficiency of importance sampling type weighting method of @vihola-helske-franks in a dynamic generalized linear model setting within the HMC context. Although some of our other preliminary results have suggested that naive combination of the HMC with IS weighting can provide rather limited computational gains, in case of dynamic GLMs so called global approximation technique [@vihola-helske-franks] provides fast and robust alternative to standard HMC approach of `Stan`. +The motivation behind the `walker` was to test the efficiency of importance sampling type weighting method of @vihola-helske-franks in a dynamic generalized linear model setting within the HMC context. Although some of our other preliminary results have suggested that naive combination of the HMC with IS weighting can provide rather limited computational gains, in case of GLMs with time-varying coefficients so called global approximation technique [@vihola-helske-franks] provides fast and robust alternative to standard HMC approach of `Stan`. ## Acknowledgements