enable other start values (#396)

* initial try * update docs * update start control * update docs * add internal object to hold start funs * update docs * update docs * only when singularity use nearPD * remove default start functions * update staring value section and update tests * update docs * Update R/fit.R Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * address comments * [skip actions] Roxygen Man Pages Auto Update * Update tests/testthat/test-fit.R Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * Update vignettes/subsections/_intro-customizations.Rmd Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * Update vignettes/subsections/_intro-customizations.Rmd Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * Update vignettes/subsections/_intro-customizations.Rmd Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * Update tests/testthat/test-fit.R Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * add error info and update vignettes * fix typo * update docs and tests * update docs * update doc and check * Update vignettes/subsections/_intro-customizations.Rmd Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * adapt examples * use data instead of full_frame in emp (#401) * use data instead of full_frame in emp * add note for emp_start * Apply suggestions from code review Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> * [skip actions] Restyle files * update example * typo update --------- Co-authored-by: Daniel Sabanes Bove <danielinteractive@users.noreply.github.com> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
openpharma · Jan 4, 2024 · c343a9b · c343a9b
1 parent a401521
commit c343a9b
Show file tree

Hide file tree

Showing 15 changed files with 1,093 additions and 608 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -33,12 +33,14 @@ export(cov_struct)
 export(cov_types)
 export(df_1d)
 export(df_md)
+export(emp_start)
 export(fit_mmrm)
 export(fit_single_optimizer)
 export(glance)
 export(mmrm)
 export(mmrm_control)
 export(refit_multiple_optimizers)
+export(std_start)
 export(tidy)
 import(checkmate)
 importFrom(Matrix,.bdiag)

diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,8 @@
 ### New Features
 
 - `Anova` is implemented for `mmrm` models and available upon loading the `car` package. It supports type II and III hypothesis testing.
+- The argument `start` for `mmrm_control()` is updated to allow function/numeric input for better
+choices of initial values.
 
 ### Miscellaneous
 

diff --git a/R/fit.R b/R/fit.R
@@ -201,8 +201,8 @@ refit_multiple_optimizers <- function(fit,
 #' @param n_cores (`count`)\cr number of cores to be used.
 #' @param method (`string`)\cr adjustment method for degrees of freedom.
 #' @param vcov (`string`)\cr coefficients covariance matrix adjustment method.
-#' @param start (`numeric` or `NULL`)\cr optional start values for variance
-#'   parameters.
+#' @param start (`NULL`, `numeric` or `function`)\cr optional start values for variance
+#'   parameters. See details for more information.
 #' @param accept_singular (`flag`)\cr whether singular design matrices are reduced
 #'   to full rank automatically and additional coefficient estimates will be missing.
 #' @param optimizers (`list`)\cr optimizer specification, created with [h_get_optimizers()].
@@ -240,6 +240,12 @@ refit_multiple_optimizers <- function(fit,
 #'   compared to SAS; Use "Kenward-Roger-Linear" for `vcov` instead for better matching
 #'   of the SAS results.
 #'
+#' - The argument `start` is used to facilitate the choice of initial values for fitting the model.
+#'   If `function` is provided, make sure its parameter is a valid element of `mmrm_tmb_data`
+#'   or `mmrm_tmb_formula_parts` and it returns a numeric vector.
+#'   By default or if `NULL` is provided, `std_start` will be used.
+#'   Other implemented methods include `emp_start`.
+#'
 #' @return List of class `mmrm_control` with the control parameters.
 #' @export
 #'
@@ -251,14 +257,21 @@ refit_multiple_optimizers <- function(fit,
 mmrm_control <- function(n_cores = 1L,
                          method = c("Satterthwaite", "Kenward-Roger", "Residual", "Between-Within"),
                          vcov = NULL,
-                         start = NULL,
+                         start = std_start,
                          accept_singular = TRUE,
                          drop_visit_levels = TRUE,
                          ...,
                          optimizers = h_get_optimizers(...)) {
   assert_count(n_cores, positive = TRUE)
   assert_character(method)
-  assert_numeric(start, null.ok = TRUE)
+  if (is.null(start)) {
+    start <- std_start
+  }
+  assert(
+    check_function(start, args = "..."),
+    check_numeric(start, null.ok = FALSE),
+    combine = "or"
+  )
   assert_flag(accept_singular)
   assert_flag(drop_visit_levels)
   assert_list(optimizers, names = "unique", types = c("function", "partial"))

diff --git a/R/tmb.R b/R/tmb.R
@@ -258,25 +258,17 @@ h_mmrm_tmb_parameters <- function(formula_parts,
   assert_class(tmb_data, "mmrm_tmb_data")
 
   m <- tmb_data$n_visits
-  start_value <- switch(formula_parts$cov_type,
-    us = rep(0, m * (m + 1) / 2),
-    toep = rep(0, m),
-    toeph = rep(0, 2 * m - 1),
-    ar1 = c(0, 0.5),
-    ar1h = c(rep(0, m), 0.5),
-    ad = rep(0, m),
-    adh = rep(0, 2 * m - 1),
-    cs = rep(0, 2),
-    csh = rep(0, m + 1),
-    sp_exp = rep(0, 2)
-  )
-  theta_dim <- length(start_value) * n_groups
-  if (!is.null(start)) {
-    assert_numeric(start, len = theta_dim, any.missing = FALSE, finite = TRUE)
+  start_value0 <- std_start(formula_parts$cov_type, m, n_groups)
+  theta_dim <- length(start_value0)
+  start_values <- if (is.null(start)) {
+    start_value0
+  } else if (test_function(start)) {
+    do.call(start, utils::modifyList(formula_parts, tmb_data))
   } else {
-    start <- rep(start_value, n_groups)
+    start
   }
-  list(theta = start)
+  assert_numeric(start_values, len = theta_dim, any.missing = FALSE, finite = TRUE)
+  list(theta = start_values)
 }
 
 #' Asserting Sane Start Values for `TMB` Fit

diff --git a/R/utils.R b/R/utils.R
@@ -348,6 +348,119 @@ h_valid_formula <- function(formula) {
   }
 }
 
+#' Standard Starting Value
+#'
+#' @description Obtain standard start values.
+#'
+#' @param cov_type (`string`)\cr name of the covariance structure.
+#' @param n_visits (`int`)\cr number of visits.
+#' @param n_groups (`int`)\cr number of groups.
+#' @param ... not used.
+#'
+#' @details
+#' `std_start` will try to provide variance parameter from identity matrix.
+#' However, for `ar1` and `ar1h` the corresponding values are not ideal because the
+#' \eqn{\rho} is usually a positive number thus using 0 as starting value can lead to
+#' incorrect optimization result, and we use 0.5 as the initial value of \eqn{\rho}.
+#'
+#' @return A numeric vector of starting values.
+#'
+#' @export
+std_start <- function(cov_type, n_visits, n_groups, ...) {
+  assert_string(cov_type)
+  assert_subset(cov_type, cov_types(c("abbr", "habbr")))
+  assert_int(n_visits, lower = 1L)
+  assert_int(n_groups, lower = 1L)
+  start_value <- switch(cov_type,
+    us = rep(0, n_visits * (n_visits + 1) / 2),
+    toep = rep(0, n_visits),
+    toeph = rep(0, 2 * n_visits - 1),
+    ar1 = c(0, 0.5),
+    ar1h = c(rep(0, n_visits), 0.5),
+    ad = rep(0, n_visits),
+    adh = rep(0, 2 * n_visits - 1),
+    cs = rep(0, 2),
+    csh = rep(0, n_visits + 1),
+    sp_exp = rep(0, 2)
+  )
+  rep(start_value, n_groups)
+}
+
+#' Empirical Starting Value
+#'
+#' @description Obtain empirical start value for unstructured covariance
+#'
+#' @param data (`data.frame`)\cr data used for model fitting.
+#' @param model_formula (`formula`)\cr the formula in mmrm model without covariance structure part.
+#' @param visit_var (`string`)\cr visit variable.
+#' @param subject_var (`string`)\cr subject id variable.
+#' @param subject_groups (`factor`)\cr subject group assignment.
+#' @param ... not used.
+#'
+#' @details
+#' This `emp_start` only works for unstructured covariance structure.
+#' It uses linear regression to first obtain the coefficients and use the residuals
+#' to obtain the empirical variance-covariance, and it is then used to obtain the
+#' starting values.
+#'
+#' @note `data` is used instead of `full_frame` because `full_frame` is already
+#' transformed if model contains transformations, e.g. `log(FEV1) ~ exp(FEV1_BL)` will
+#' drop `FEV1` and `FEV1_BL` but add `log(FEV1)` and `exp(FEV1_BL)` in `full_frame`.
+#'
+#' @return A numeric vector of starting values.
+#'
+#' @export
+emp_start <- function(data, model_formula, visit_var, subject_var, subject_groups, ...) {
+  assert_formula(model_formula)
+  assert_data_frame(data)
+  assert_subset(all.vars(model_formula), colnames(data))
+  assert_string(visit_var)
+  assert_string(subject_var)
+  assert_factor(data[[visit_var]])
+  n_visits <- length(levels(data[[visit_var]]))
+  assert_factor(data[[subject_var]])
+  subjects <- droplevels(data[[subject_var]])
+  n_subjects <- length(levels(subjects))
+  fit <- lm(formula = model_formula, data = data)
+  res <- rep(NA, n_subjects * n_visits)
+  res[
+    n_visits * as.integer(subjects) - n_visits + as.integer(data[[visit_var]])
+  ] <- residuals(fit)
+  res_mat <- matrix(res, ncol = n_visits, nrow = n_subjects, byrow = TRUE)
+  emp_covs <- lapply(
+    unname(split(seq_len(n_subjects), subject_groups)),
+    function(x) {
+      cov(res_mat[x, , drop = FALSE], use = "pairwise.complete.obs")
+    }
+  )
+  unlist(lapply(emp_covs, h_get_theta_from_cov))
+}
+#' Obtain Theta from Covariance Matrix
+#'
+#' @description Obtain unstructured theta from covariance matrix.
+#'
+#' @param covariance (`matrix`) of covariance matrix values.
+#'
+#' @details
+#' If the covariance matrix has `NA` in some of the elements, they will be replaced by
+#' 0 (non-diagonal) and 1 (diagonal). This ensures that the matrix is positive definite.
+#'
+#' @keywords internal
+h_get_theta_from_cov <- function(covariance) {
+  assert_matrix(covariance, mode = "numeric", ncols = nrow(covariance))
+  covariance[is.na(covariance)] <- 0
+  diag(covariance)[diag(covariance) == 0] <- 1
+  # empirical is not always positive definite in some special cases of numeric singularity.
+  qr_res <- qr(covariance)
+  if (qr_res$rank < ncol(covariance)) {
+    covariance <- Matrix::nearPD(covariance)$mat
+  }
+  emp_chol <- t(chol(covariance))
+  mat <- t(solve(diag(diag(emp_chol)), emp_chol))
+  ret <- c(log(diag(emp_chol)), mat[upper.tri(mat)])
+  unname(ret)
+}
+
 #' Register S3 Method
 #' Register S3 method to a generic.
 #'

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -68,6 +68,10 @@ reference:
       - df_1d
       - df_md
       - component
+  - title: Start Values
+    contents:
+      - std_start
+      - emp_start
   - title: Methods
     contents:
       - mmrm_methods

diff --git a/man/emp_start.Rd b/man/emp_start.Rd
diff --git a/man/h_get_theta_from_cov.Rd b/man/h_get_theta_from_cov.Rd
diff --git a/man/mmrm_control.Rd b/man/mmrm_control.Rd
diff --git a/man/std_start.Rd b/man/std_start.Rd