diff --git a/.github/workflows/check_on_different_r_os.yml b/.github/workflows/check_on_different_r_os.yml index 736abd9..31901aa 100644 --- a/.github/workflows/check_on_different_r_os.yml +++ b/.github/workflows/check_on_different_r_os.yml @@ -64,6 +64,7 @@ jobs: Rscript -e "remotes::install_github('r-hub/sysreqs')" sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") sudo -s eval "$sysreqs" + sudo apt-get install -y libcurl4-openssl-dev - name: Install dependencies run: | diff --git a/.zenodo.json b/.zenodo.json index 44f827b..7916a3e 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,7 +1,7 @@ { "title": "multimput: Using Multiple Imputation to Address Missing Data", - "version": "0.2.10", - "description": "Accompaning package for the paper: \"Working with population totals in the presence of with missing data\".", + "version": "0.2.11", + "description": "Accompanying package for the paper: \"Working with population totals in the presence of with missing data\".", "creators": [ { "name": "Onkelinx, Thierry", @@ -34,5 +34,6 @@ "type": "ContactPerson", "orcid": "https://orcid.org/0000-0001-8804-4216" } - ] + ], + "language": "eng" } diff --git a/CITATION.cff b/CITATION.cff index 086ae97..fdf15c7 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -17,11 +17,13 @@ contact: - email: info@inbo.be name: Research Institute for Nature and Forest title: 'multimput: Using Multiple Imputation to Address Missing Data' -version: 0.2.10 -abstract: 'Accompaning package for the paper: "Working with population totals in the - presence of with missing data".' +version: 0.2.11 +abstract: 'Accompanying package for the paper: "Working with population totals in + the presence of with missing data".' license: GPL-3.0 type: software doi: 10.5281/zenodo.598331 repository-code: https://github.com/inbo/multimput -identifiers: [] +identifiers: +- type: url + value: https://inbo.github.io/multimput/ diff --git a/DESCRIPTION b/DESCRIPTION index 6141814..4f2bb68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: multimput Title: Using Multiple Imputation to Address Missing Data -Version: 0.2.10 +Version: 0.2.11 Authors@R: c( person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8804-4216")), @@ -11,11 +11,11 @@ Authors@R: c( comment = c(ORCID = "0000-0002-6894-9402")), person("Research Institute for Nature and Forest", , , "info@inbo.be", role = c("cph", "fnd")) ) -Description: Accompaning package for the paper: "Working with population +Description: Accompanying package for the paper: "Working with population totals in the presence of with missing data". License: GPL-3 URL: https://doi.org/10.5281/zenodo.598331, - https://github.com/inbo/multimput + https://github.com/inbo/multimput, https://inbo.github.io/multimput/ BugReports: https://github.com/inbo/multimput/issues Depends: R (>= 3.0.0) @@ -41,8 +41,9 @@ Suggests: testthat VignetteBuilder: knitr -Additional_repositories: https://inla.r-inla-download.org/R/testing +Additional_repositories: https://inla.r-inla-download.org/R/stable Encoding: UTF-8 +Language: en-GB LazyData: TRUE Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index a7a4b26..e2849a1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,7 +11,7 @@ export(missing_current_count) export(missing_observed) export(missing_volunteer) exportClasses(aggregatedImputed) -exportClasses(inla) +exportClasses(maybeInla) exportClasses(rawImputed) exportMethods(aggregate_impute) exportMethods(impute) @@ -39,9 +39,12 @@ importFrom(dplyr,summarise_at) importFrom(dplyr,transmute) importFrom(dplyr,ungroup) importFrom(dplyr,vars) +importFrom(methods,getClassDef) importFrom(methods,new) importFrom(methods,setClass) +importFrom(methods,setClassUnion) importFrom(methods,setGeneric) +importFrom(methods,setIs) importFrom(methods,setMethod) importFrom(methods,setOldClass) importFrom(methods,setValidity) diff --git a/NEWS.md b/NEWS.md index eee334a..3d1e5f9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# multimput 0.2.11 + +* Vignette runs without INLA. + Required to make the package build on https://inbo.r-universe.dev + # multimput 0.2.10 * Use checklist infrastructure. diff --git a/R/aggregate_impute.R b/R/aggregate_impute.R index f659761..ce341e1 100644 --- a/R/aggregate_impute.R +++ b/R/aggregate_impute.R @@ -1,5 +1,5 @@ #' Aggregate an imputed dataset -#' @param object A rawImputed object. +#' @param object A `rawImputed` object. #' @param grouping A vector of variables names to group the aggregation on. #' @param fun The function to aggregate. #' @param filter diff --git a/R/aggregated_imputed_class.R b/R/aggregated_imputed_class.R index 0f79179..e68765b 100644 --- a/R/aggregated_imputed_class.R +++ b/R/aggregated_imputed_class.R @@ -1,9 +1,9 @@ -#' The aggregatedImputed class +#' The `aggregatedImputed` class #' Holds an aggregated imputation data set #' @section Slots: #' \describe{ #' \item{`Covariate`}{A data.frame with the covariates.} -#' \item{`Imputation`}{A matrix wih aggregated imputed values.} +#' \item{`Imputation`}{A matrix with aggregated imputed values.} #' } #' @name aggregatedImputed-class #' @rdname aggregatedImputed-class diff --git a/R/generate_data.R b/R/generate_data.R index 5723dda..fa383d1 100644 --- a/R/generate_data.R +++ b/R/generate_data.R @@ -1,6 +1,6 @@ #' Generate simulated data #' -#' Generate data for a regural monitoring design. +#' Generate data for a regular monitoring design. #' The counts follow a negative binomial distribution with given size parameters #' and the true mean mu depending on a year, period and site effect. #' All effects are independent from each other and have, on the log-scale, a @@ -15,7 +15,7 @@ #' Defaults to `FALSE`. #' @param site_factor Convert site to a factor. #' Defaults to `FALSE`. -#' @param trend The longterm linear trend on the log-scale. +#' @param trend The long-term linear trend on the log-scale. #' @param sd_rw_year The standard deviation of the year effects on the #' log-scale. #' @param amplitude_period The amplitude of the periodic effect on the diff --git a/R/import_s3_classes.R b/R/import_s3_classes.R index ff41646..3876a56 100644 --- a/R/import_s3_classes.R +++ b/R/import_s3_classes.R @@ -1,8 +1,16 @@ -#' inla -#' -#' the inla class is defined in the INLA package -#' @name inla-class -#' @seealso \link[INLA]{inla} #' @importFrom methods setOldClass -#' @exportClass inla setOldClass("inla") + +#' The `maybeInla` class +#' +#' A superclass holding either `NULL` or an object of the `inla` class. +#' @importFrom methods setClassUnion +#' @exportClass maybeInla +setClassUnion("maybeInla", "NULL") + +#' @importFrom methods getClassDef setIs +.onLoad <- function(...) { + if (requireNamespace("INLA", quietly = TRUE)) { + setIs("inla", "maybeInla", classDef = getClassDef("inla", package = "INLA")) + } +} diff --git a/R/impute_inla.R b/R/impute_inla.R index 86c96ed..2a93e94 100644 --- a/R/impute_inla.R +++ b/R/impute_inla.R @@ -23,11 +23,12 @@ #' using `B:0` threads. setMethod( f = "impute", - signature = signature(model = "inla"), + signature = signature(model = "maybeInla"), definition = function( model, ..., seed = 0L, num_threads = NULL, parallel_configs = TRUE, n_imp = 19 ) { + assert_that(!is.null(model), msg = "model should be an inla object") check_old_names(..., old_names = c(n_imp = "n.imp")) assert_that(is.count(n_imp)) assert_that( @@ -85,7 +86,7 @@ setMethod( .y = hyperpar[[grep("size for the nbinomial", colnames(hyperpar))]] ), poisson = map_dfr( - .x = latent, .f = rpois, n = length(missing_obs) + .x = latent, .f = ~rpois(n = length(missing_obs), lambda = exp(.x)) ), stop( "Imputations from the '", model$.args$family, "' family not yet defined. diff --git a/R/raw_imputed_class.R b/R/raw_imputed_class.R index 31ba529..397f2c2 100644 --- a/R/raw_imputed_class.R +++ b/R/raw_imputed_class.R @@ -1,4 +1,4 @@ -#' The rawimputed class +#' The `rawImputed` class #' Holds a dataset and imputed values #' @section Slots: #' \describe{ @@ -6,7 +6,7 @@ #' \item{`Response`}{A character holding the name of the response variable.} #' \item{`Minimum`}{An optional character holding the name of the variable #' with the minimum.} -#' \item{`Imputation`}{A matrix wih imputed values.} +#' \item{`Imputation`}{A matrix with imputed values.} #' } #' @name rawImputed-class #' @rdname rawImputed-class diff --git a/README.md b/README.md index eefb439..fc6f94c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) -[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html#maturing-1) +[![Life cycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html#maturing-1) [![License](http://img.shields.io/badge/license-GPL--3-blue.svg?style=flat)](http://www.gnu.org/licenses/gpl-3.0.html) [![Release](https://img.shields.io/github/release/qubyte/rubidium.svg)](https://github.com/inbo/multimput/releases) [![check package](https://github.com/inbo/multimput/actions/workflows/check_on_branch.yml/badge.svg)](https://github.com/inbo/multimput/actions/workflows/check_on_branch.yml) @@ -15,7 +15,7 @@ ## Rationale The `multimput` package was originally intended to provide the data and code to replicate the results of Onkelinx, Devos, and Quataert (2016). -This paper is freely available at . +This paper is freely available at http://dx.doi.org/10.1007/s10336-016-1404-9. The functions were all rewritten to make them more user-friendly and more generic. In order to make the package more compact, we removed the original code and data starting for version 0.2.6. However both the original code and data remain available in [the older releases](https://github.com/inbo/multimput/releases). @@ -30,13 +30,13 @@ This package requires the `INLA` package. You need to install it with `install.packages("INLA", repos = "https://inla.r-inla-download.org/R/stable")`. If this fails you can use `remotes::install_github("inbo/INLA")`. Note that the latter is just a read-only mirror which is infrequently updated. -Hence installing `INLA` from is highly recommended. +Hence installing `INLA` from https://inla.r-inla-download.org is highly recommended. Once `INLA` is installed, you can install `multimput` using the `remotes` package: `remotes::install_github("inbo/multimput", build_vignettes = TRUE)`). To view the vignette use `vignette("Impute", package = "multimput")`. The vignette is also available at https://inbo.github.io/multimput/articles/impute.html -A docker image with all the required dependencies is available from . +A docker image with all the required dependencies is available from https://hub.docker.com/r/inbobmk/multimput/. Use `docker pull inbobmk/multimput` to get it. ## Folder structure diff --git a/checklist.yml b/checklist.yml index c45c643..5fe4c89 100644 --- a/checklist.yml +++ b/checklist.yml @@ -13,7 +13,7 @@ allowed: Suggests or Enhances not in mainstream repositories: INLA Availability using Additional_repositories specification: - INLA yes https://inla.r-inla-download.org/R/testing + INLA yes https://inla.r-inla-download.org/R/stable citation_roles: - aut - cre diff --git a/inst/CITATION b/inst/CITATION index b0d3a29..8ea2fba 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,12 +2,12 @@ citHeader("To cite `multimput` in publications please use:") # begin checklist entry citEntry( entry = "Manual", - title = "multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.10", + title = "multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.11", author = c(person(given = "Thierry", family = "Onkelinx"), person(given = "Koen", family = "Devos"), person(given = "Paul", family = "Quataert")), year = 2022, url = "https://doi.org/10.5281/zenodo.598331", - abstract = "Accompaning package for the paper: \"Working with population totals in the presence of with missing data\".", - textVersion = "Onkelinx, Thierry; Devos, Koen; Quataert, Paul (2022) multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.10. https://doi.org/10.5281/zenodo.598331, https://github.com/inbo/multimput", + abstract = "Accompanying package for the paper: \"Working with population totals in the presence of with missing data\".", + textVersion = "Onkelinx, Thierry; Devos, Koen; Quataert, Paul (2022) multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.11. https://doi.org/10.5281/zenodo.598331, https://github.com/inbo/multimput, https://inbo.github.io/multimput/", doi = "10.5281/zenodo.598331", ) # end checklist entry diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000..24cd7ce --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,2 @@ +Codecov +INLA diff --git a/man/aggregate_impute.Rd b/man/aggregate_impute.Rd index 09a1e15..8a83482 100644 --- a/man/aggregate_impute.Rd +++ b/man/aggregate_impute.Rd @@ -17,7 +17,7 @@ aggregate_impute(object, grouping, fun, filter, join) \S4method{aggregate_impute}{aggregatedImputed}(object, grouping, fun, filter, join) } \arguments{ -\item{object}{A rawImputed object.} +\item{object}{A \code{rawImputed} object.} \item{grouping}{A vector of variables names to group the aggregation on.} diff --git a/man/aggregatedImputed-class.Rd b/man/aggregatedImputed-class.Rd index 3539905..5935252 100644 --- a/man/aggregatedImputed-class.Rd +++ b/man/aggregatedImputed-class.Rd @@ -3,17 +3,17 @@ \docType{class} \name{aggregatedImputed-class} \alias{aggregatedImputed-class} -\title{The aggregatedImputed class +\title{The \code{aggregatedImputed} class Holds an aggregated imputation data set} \description{ -The aggregatedImputed class +The \code{aggregatedImputed} class Holds an aggregated imputation data set } \section{Slots}{ \describe{ \item{\code{Covariate}}{A data.frame with the covariates.} -\item{\code{Imputation}}{A matrix wih aggregated imputed values.} +\item{\code{Imputation}}{A matrix with aggregated imputed values.} } } diff --git a/man/deprecated.Rd b/man/deprecated.Rd index 8678f29..4cd885c 100644 --- a/man/deprecated.Rd +++ b/man/deprecated.Rd @@ -81,7 +81,7 @@ Defaults to \code{FALSE}.} \item{site.factor}{Convert site to a factor. Defaults to \code{FALSE}.} -\item{trend}{The longterm linear trend on the log-scale.} +\item{trend}{The long-term linear trend on the log-scale.} \item{sd.rw.year}{The standard deviation of the year effects on the log-scale.} diff --git a/man/generate_data.Rd b/man/generate_data.Rd index 7f3818a..b82bed8 100644 --- a/man/generate_data.Rd +++ b/man/generate_data.Rd @@ -44,7 +44,7 @@ Defaults to \code{FALSE}.} \item{site_factor}{Convert site to a factor. Defaults to \code{FALSE}.} -\item{trend}{The longterm linear trend on the log-scale.} +\item{trend}{The long-term linear trend on the log-scale.} \item{sd_rw_year}{The standard deviation of the year effects on the log-scale.} @@ -85,7 +85,7 @@ scale. \code{Count} are the simulated counts. } \description{ -Generate data for a regural monitoring design. +Generate data for a regular monitoring design. The counts follow a negative binomial distribution with given size parameters and the true mean mu depending on a year, period and site effect. All effects are independent from each other and have, on the log-scale, a diff --git a/man/impute.Rd b/man/impute.Rd index 0aaf3d7..26125de 100644 --- a/man/impute.Rd +++ b/man/impute.Rd @@ -6,7 +6,7 @@ \alias{impute} \alias{impute,ANY-method} \alias{impute,glmerMod-method} -\alias{impute,inla-method} +\alias{impute,maybeInla-method} \alias{impute,lm-method} \title{Impute a dataset} \usage{ @@ -16,7 +16,7 @@ impute(model, ..., n_imp = 19) \S4method{impute}{glmerMod}(model, data, ..., n_imp) -\S4method{impute}{inla}( +\S4method{impute}{maybeInla}( model, ..., seed = 0L, diff --git a/man/inla-class.Rd b/man/inla-class.Rd deleted file mode 100644 index 7dcb9e0..0000000 --- a/man/inla-class.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/import_s3_classes.R -\name{inla-class} -\alias{inla-class} -\title{inla} -\description{ -the inla class is defined in the INLA package -} -\seealso{ -\link[INLA]{inla} -} diff --git a/man/maybeInla-class.Rd b/man/maybeInla-class.Rd new file mode 100644 index 0000000..cd82e02 --- /dev/null +++ b/man/maybeInla-class.Rd @@ -0,0 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import_s3_classes.R +\docType{class} +\name{maybeInla-class} +\alias{maybeInla-class} +\title{The \code{maybeInla} class} +\description{ +A superclass holding either \code{NULL} or an object of the \code{inla} class. +} diff --git a/man/rawImputed-class.Rd b/man/rawImputed-class.Rd index 6626c68..fa83c9a 100644 --- a/man/rawImputed-class.Rd +++ b/man/rawImputed-class.Rd @@ -3,10 +3,10 @@ \docType{class} \name{rawImputed-class} \alias{rawImputed-class} -\title{The rawimputed class +\title{The \code{rawImputed} class Holds a dataset and imputed values} \description{ -The rawimputed class +The \code{rawImputed} class Holds a dataset and imputed values } \section{Slots}{ @@ -16,7 +16,7 @@ Holds a dataset and imputed values \item{\code{Response}}{A character holding the name of the response variable.} \item{\code{Minimum}}{An optional character holding the name of the variable with the minimum.} -\item{\code{Imputation}}{A matrix wih imputed values.} +\item{\code{Imputation}}{A matrix with imputed values.} } } diff --git a/multimput.Rproj b/multimput.Rproj index e4a061f..e06e6fe 100644 --- a/multimput.Rproj +++ b/multimput.Rproj @@ -17,6 +17,7 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes +PackageCleanBeforeInstall: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageCheckArgs: --as-cran PackageRoxygenize: rd,collate,namespace diff --git a/tests/testthat/test_ab_impute.R b/tests/testthat/test_ab_impute.R index 2460e71..736f98f 100644 --- a/tests/testthat/test_ab_impute.R +++ b/tests/testthat/test_ab_impute.R @@ -12,40 +12,22 @@ test_that("handles lm", { imputed <- impute(model, dataset), "rawImputed" ) - expect_identical( - ncol(imputed@Imputation), - 19L - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) - expect_identical( - imputed@Minimum, - "" - ) + expect_identical(ncol(imputed@Imputation), 19L) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) + expect_identical(imputed@Minimum, "") expect_is( imputed <- impute(model, dataset, n_imp = n_imp), "rawImputed" ) - expect_identical( - ncol(imputed@Imputation), - n_imp - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) + expect_identical(ncol(imputed@Imputation), n_imp) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) expect_is( imputed <- impute(model, dataset, minimum = "Bottom"), "rawImputed" ) - expect_identical( - imputed@Minimum, - "Bottom" - ) + expect_identical(imputed@Minimum, "Bottom") expect_error( impute(model, dataset, minimum = "Junk"), @@ -78,31 +60,16 @@ test_that("handles inla with gaussian distribution", { control.predictor = list(compute = TRUE, link = 1) ) expect_is(imputed <- impute(model, parallel_configs = FALSE), "rawImputed") - expect_identical( - ncol(imputed@Imputation), - 19L - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) - expect_identical( - imputed@Minimum, - "" - ) + expect_identical(ncol(imputed@Imputation), 19L) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) + expect_identical(imputed@Minimum, "") expect_is( imputed <- impute(model, dataset, n_imp = n_imp, parallel_configs = FALSE), "rawImputed" ) - expect_identical( - ncol(imputed@Imputation), - n_imp - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) + expect_identical(ncol(imputed@Imputation), n_imp) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) expect_is( imputed <- impute( @@ -110,10 +77,7 @@ test_that("handles inla with gaussian distribution", { ), "rawImputed" ) - expect_identical( - imputed@Minimum, - "Bottom" - ) + expect_identical(imputed@Minimum, "Bottom") expect_error( impute(model, dataset, minimum = "Junk", parallel_configs = FALSE), @@ -145,31 +109,16 @@ test_that("handles inla with negative binomial distribution", { control.predictor = list(compute = TRUE, link = 1) ) expect_is(imputed <- impute(model, parallel_configs = FALSE), "rawImputed") - expect_identical( - ncol(imputed@Imputation), - 19L - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) - expect_identical( - imputed@Minimum, - "" - ) + expect_identical(ncol(imputed@Imputation), 19L) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) + expect_identical(imputed@Minimum, "") expect_is( imputed <- impute(model, dataset, n_imp = n_imp, parallel_configs = FALSE), "rawImputed" ) - expect_identical( - ncol(imputed@Imputation), - n_imp - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) + expect_identical(ncol(imputed@Imputation), n_imp) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) expect_is( imputed <- impute( @@ -177,10 +126,7 @@ test_that("handles inla with negative binomial distribution", { ), "rawImputed" ) - expect_identical( - imputed@Minimum, - "Bottom" - ) + expect_identical(imputed@Minimum, "Bottom") expect_error( impute(model, dataset, minimum = "Junk", parallel_configs = FALSE), @@ -211,31 +157,16 @@ test_that("handles inla with poisson distribution", { control.predictor = list(compute = TRUE, link = 1) ) expect_is(imputed <- impute(model, parallel_configs = FALSE), "rawImputed") - expect_identical( - ncol(imputed@Imputation), - 19L - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) - expect_identical( - imputed@Minimum, - "" - ) + expect_identical(ncol(imputed@Imputation), 19L) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) + expect_identical(imputed@Minimum, "") expect_is( imputed <- impute(model, dataset, n_imp = n_imp, parallel_configs = FALSE), "rawImputed" ) - expect_identical( - ncol(imputed@Imputation), - n_imp - ) - expect_identical( - nrow(imputed@Imputation), - sum(is.na(dataset$Count)) - ) + expect_identical(ncol(imputed@Imputation), n_imp) + expect_identical(nrow(imputed@Imputation), sum(is.na(dataset$Count))) expect_is( imputed <- impute( diff --git a/vignettes/impute.Rmd b/vignettes/impute.Rmd index b02338b..10c2d58 100644 --- a/vignettes/impute.Rmd +++ b/vignettes/impute.Rmd @@ -9,7 +9,6 @@ vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} %\VignetteDepends{ggplot2} - %\VignetteDepends{INLA} %\VignetteDepends{mgcv} %\VignetteDepends{MASS} --- @@ -55,9 +54,9 @@ The standard error of a coefficient is the square root of a sum of two parts. The first part is the average of the squared standard error in all imputation sets. The second part is the variance of the coefficient among the imputation sets, multiplied by a correction factor $1 + \frac{1}{L}$. -$$\bar{\gamma}_a = \frac{\sum_{l = 1}^L{\gamma_a}_l}{L}$$ -$$\bar{\sigma}_a = \sqrt{\frac{\sum_{l = 1}^J {{\sigma_a^2}_l}}{L} + (1 + \frac{1}{L}) -\frac{\sum_{l = 1}^L({\gamma_a}_l - \bar{\gamma}_a) ^ 2}{L - 1}}$$ +$$\bar{\gamma}_a = \frac{\sum_{l = 1}^L{\gamma_a}_l}{L}$$ + +$$\bar{\sigma}_a = \sqrt{\frac{\sum_{l = 1}^J {{\sigma_a^2}_l}}{L} + (1 + \frac{1}{L}) \frac{\sum_{l = 1}^L({\gamma_a}_l - \bar{\gamma}_a) ^ 2}{L - 1}}$$ # The dataset @@ -71,9 +70,7 @@ Have a look at the help-file of `generateData()` for more details on the model. library(multimput) set.seed(123) prop_missing <- 0.5 -dataset <- generate_data( - n_year = 10, n_period = 6, n_site = 50, n_run = 1 -) +dataset <- generate_data(n_year = 10, n_period = 6, n_site = 50, n_run = 1) dataset$Observed <- dataset$Count which_missing <- sample(nrow(dataset), size = nrow(dataset) * prop_missing) dataset$Observed[which_missing] <- NA @@ -85,31 +82,31 @@ str(dataset) Variables in dataset -Year +`Year` ~ The year of the observation as an integer -fYear +`fYear` ~ The year of the observation as a factor -Period +`Period` ~ The period of the observation as an integer -fPeriod +`fPeriod` ~ The period of the observation as a factor -Site +`Site` ~ The ID of the site as an integer -fSite +`fSite` ~ The ID of the site as a factor -Mu +`Mu` ~ The expected value of a negative binomial distribution -Count +`Count` ~ A realisation of a negative binomial distribution with expected value `Mu` -Observed +`Observed` ~ The `Count` variable with missing data ```{r plot_data} @@ -137,6 +134,17 @@ imp_glmm <- glmer( data = dataset, family = poisson ) +``` + +```{r inla, echo = FALSE, results='asis'} +inla_available <- requireNamespace("INLA") +cat("**_This vignette requires the INLA package. +It was build on a system without the INLA package. +Please have look at the vignette on the +[website](https://inbo.github.io/multimput/articles/impute.html)._**") +``` + +```{r imputation_model_inla, eval = inla_available} library(INLA) # a mixed model with Poisson distribution # fYear and fPeriod are the fixed effects @@ -194,6 +202,10 @@ The default is `n_imp = 19`. ```{r impute} raw_lm <- impute(imp_lm, data = dataset) raw_glmm <- impute(imp_glmm, data = dataset) +``` + + +```{r impute_inla, eval = inla_available} # setting `parallel_configs = FALSE` was required to pass R CMD Check # in practice you can use the default `parallel_configs = TRUE` raw_inla_p <- impute(imp_inla_p, parallel_configs = FALSE) @@ -210,35 +222,23 @@ The resulting object will only contain the imputed response and the grouping var The easiest way to have a variable like year both a continuous and factor is to add both `Year` and `fYear` to the `grouping`. ```{r aggregate} -aggr_lm <- aggregate_impute( - raw_lm, - grouping = c("fYear", "fPeriod"), - fun = sum -) +aggr_lm <- aggregate_impute(raw_lm, grouping = c("fYear", "fPeriod"), fun = sum) aggr_glmm <- aggregate_impute( - raw_glmm, - grouping = c("fYear", "fPeriod"), - fun = sum + raw_glmm, grouping = c("fYear", "fPeriod"), fun = sum ) +``` +```{r aggregate_inla, eval = inla_available} aggr_inla_p <- aggregate_impute( - raw_inla_p, - grouping = c("fYear", "fPeriod"), - fun = sum + raw_inla_p, grouping = c("fYear", "fPeriod"), fun = sum ) aggr_inla_nb <- aggregate_impute( - raw_inla_nb, - grouping = c("fYear", "fPeriod"), - fun = sum + raw_inla_nb, grouping = c("fYear", "fPeriod"), fun = sum ) aggr_better <- aggregate_impute( - raw_better, - grouping = c("fYear", "fPeriod"), - fun = sum + raw_better, grouping = c("fYear", "fPeriod"), fun = sum ) aggr_better_9 <- aggregate_impute( - raw_better_9, - grouping = c("fYear", "fPeriod"), - fun = sum + raw_better_9, grouping = c("fYear", "fPeriod"), fun = sum ) ``` @@ -252,7 +252,7 @@ So `model_fun = lm` in combination with `rhs = "0 + fYear + fPeriod"` is equival The tricky part of this function the `extractor` argument. This is a user defined function which must have an argument called `model`. -The function should return a `data.frame` or `matrix` with two columuns. +The function should return a `data.frame` or `matrix` with two columns. The first column hold the estimate of a parameter of the `model`, the second column their standard error. Each row represents a parameter. @@ -261,10 +261,7 @@ extractor_lm <- function(model) { summary(model)$coefficients[, c("Estimate", "Std. Error")] } model_impute( - aggr_lm, - model_fun = lm, - rhs = "0 + fYear + fPeriod", - extractor = extractor_lm + aggr_lm, model_fun = lm, rhs = "0 + fYear + fPeriod", extractor = extractor_lm ) ``` @@ -281,9 +278,7 @@ extractor_lm2 <- function(model) { cf[grepl("fYear", rownames(cf)), c("Estimate", "Std. Error")] } model_impute( - aggr_lm, - model_fun = lm, - rhs = "0 + fYear + fPeriod", + aggr_lm, model_fun = lm, rhs = "0 + fYear + fPeriod", extractor = extractor_lm2 ) ``` @@ -302,17 +297,11 @@ new_set <- expand.grid( ) extractor_lm3 <- function(model, newdata) { predictions <- predict(model, newdata = newdata, se.fit = TRUE) - cbind( - predictions$fit, - predictions$se.fit - ) + cbind(predictions$fit, predictions$se.fit) } model_gam <- model_impute( - aggr_lm, - model_fun = gam, - rhs = "s(Year) + fPeriod", - extractor = extractor_lm3, - extractor_args = list(newdata = new_set), + aggr_lm, model_fun = gam, rhs = "s(Year) + fPeriod", + extractor = extractor_lm3, extractor_args = list(newdata = new_set), mutate = list(Year = ~as.integer(levels(fYear))[fYear]) ) model_gam <- cbind(new_set, model_gam) @@ -340,9 +329,7 @@ Note that the estimated index for year 1 will be 0 and $log(100\%) = 0$. ```{r glmnb_complete} library(MASS) aggr_complete <- aggregate( - dataset[, "Count", drop = FALSE], - dataset[, c("fYear", "fPeriod")], - FUN = sum + dataset[, "Count", drop = FALSE], dataset[, c("fYear", "fPeriod")], FUN = sum ) model_complete <- glm.nb(Count ~ 0 + fYear + fPeriod, data = aggr_complete) summary(model_complete) @@ -359,29 +346,26 @@ Now that we have a relevant model and extractor function, we can apply them to t ```{r model_glmnb} model_glmm <- model_impute( - object = aggr_glmm, - model_fun = glm.nb, - rhs = "0 + fYear + fPeriod", + object = aggr_glmm, model_fun = glm.nb, rhs = "0 + fYear + fPeriod", extractor = extractor_logindex ) +``` +```{r model_glmnb_inla, eval = inla_available} model_p <- model_impute( - object = aggr_inla_p, - model_fun = glm.nb, - rhs = "0 + fYear + fPeriod", + object = aggr_inla_p, model_fun = glm.nb, rhs = "0 + fYear + fPeriod", extractor = extractor_logindex ) model_nb <- model_impute( - object = aggr_inla_nb, - model_fun = glm.nb, - rhs = "0 + fYear + fPeriod", + object = aggr_inla_nb, model_fun = glm.nb, rhs = "0 + fYear + fPeriod", extractor = extractor_logindex ) model_better <- model_impute( - object = aggr_better, - model_fun = glm.nb, - rhs = "0 + fYear + fPeriod", + object = aggr_better, model_fun = glm.nb, rhs = "0 + fYear + fPeriod", extractor = extractor_logindex ) +``` + +```{r model_glmnb2} model_complete <- extractor_logindex(model_complete) colnames(model_complete) <- c("Estimate", "SE") library(dplyr) @@ -395,6 +379,8 @@ model_complete <- model_complete %>% covar <- data.frame( Year = sort(unique(dataset$Year)) ) +``` +```{r model_glmnb_inla2, eval = inla_available} # combine all results and add the Year parameters <- rbind( cbind(covar, model_glmm, Model = "glmm"), @@ -403,6 +389,12 @@ parameters <- rbind( cbind(covar, model_better, Model = "better"), cbind(covar, model_complete, Model = "complete") ) +``` +```{r model_glmnb_inla3, eval = !inla_available, echo = FALSE} +# combine all results and add the Year +parameters <- cbind(covar, model_glmm, Model = "glmm") +``` +```{r model_glmnb_plot} # convert estimate and confidence interval to the original scale parameters[, c("Estimate", "LCL", "UCL")] <- exp( parameters[, c("Estimate", "LCL", "UCL")] @@ -420,7 +412,7 @@ The example below does something similar. Two things are different: 1) instead of `glm.nb` we use `inla` to model the imputed totals. 2) we model the seasonal pattern as a random intercept instead of a fixed effect. -```{r model_inla} +```{r model_inla, eval = inla_available} extractor_inla <- function(model) { fe <- model$summary.fixed[, c("mean", "sd")] log_index <- fe[grepl("fYear", rownames(fe)), ] @@ -428,36 +420,22 @@ extractor_inla <- function(model) { log_index } model_p <- model_impute( - object = aggr_glmm, - model_fun = inla, - rhs = "0 + fYear + f(fPeriod, model = 'iid')", - model_args = list(family = "nbinomial"), - extractor = extractor_inla -) -model_p <- model_impute( - object = aggr_inla_p, - model_fun = inla, + object = aggr_glmm, model_fun = inla, rhs = "0 + fYear + f(fPeriod, model = 'iid')", - model_args = list(family = "nbinomial"), - extractor = extractor_inla + model_args = list(family = "nbinomial"), extractor = extractor_inla ) model_nb <- model_impute( - object = aggr_inla_nb, - model_fun = inla, + object = aggr_inla_nb, model_fun = inla, rhs = "0 + fYear + f(fPeriod, model = 'iid')", - model_args = list(family = "nbinomial"), - extractor = extractor_inla + model_args = list(family = "nbinomial"), extractor = extractor_inla ) model_better <- model_impute( - object = aggr_better, - model_fun = inla, + object = aggr_better, model_fun = inla, rhs = "0 + fYear + f(fPeriod, model = 'iid')", - model_args = list(family = "nbinomial"), - extractor = extractor_inla + model_args = list(family = "nbinomial"), extractor = extractor_inla ) m_complete <- inla( - Count ~ 0 + fYear + f(fPeriod, model = "iid"), - data = aggr_complete, + Count ~ 0 + fYear + f(fPeriod, model = "iid"), data = aggr_complete, family = "nbinomial" ) model_complete <- extractor_inla(m_complete)