update vignettes, actually choose best and not worse values with mult…

…istart optimization, numeric time variable, add option for penalized likelihood
helske · Sep 27, 2024 · 7e7b0cd · 7e7b0cd
1 parent 8622f15
commit 7e7b0cd
Show file tree

Hide file tree

Showing 8 changed files with 108 additions and 30 deletions.
diff --git a/R/check_build_arguments.R b/R/check_build_arguments.R
@@ -62,6 +62,29 @@
       channel_names <- paste("Channel", seq_len(n_channels))
     }
     sequence_lengths <- do.call(pmax, lapply(x, TraMineR::seqlength))
+    times <- colnames(x[[1]])
+    na_times <- suppressWarnings(any(is.na(timenames <- as.numeric(times))))
+    if (na_times) {
+      na_times <- suppressWarnings(any(is.na(timenames <- as.numeric(sub('.', '', times)))))
+      if (na_times) {
+        warning_(
+          paste0(
+            "Time indices (column names) of sequences are not coarceable to ",
+            "numeric. Replacing them with integers."
+          )
+        )
+        timenames <- seq_len(ncol(x[[1]]))
+      }
+    }
+    stopifnot_(
+      identical(sort(timenames), timenames),
+      paste0(
+        "The numeric time indices based on column names of sequence object ", 
+        "are not numerically sorted. Please recode the column names.")
+    )
+    for (i in seq_len(length(x))) {
+      colnames(x[[i]]) <- timenames 
+    }
   } else {
     n_sequences <- nrow(x)
     length_of_sequences <- ncol(x)
@@ -73,6 +96,27 @@
       channel_names <- "Observations"
     }
     sequence_lengths <- TraMineR::seqlength(x)
+    times <- colnames(x)
+    na_times <- suppressWarnings(any(is.na(timenames <- as.numeric(times))))
+    if (na_times) {
+      na_times <- suppressWarnings(any(is.na(timenames <- as.numeric(sub('.', '', times)))))
+      if (na_times) {
+        warning_(
+          paste0(
+            "Time indices (column names) of sequences are not coarceable to ",
+            "numeric. Replacing them with integers."
+          )
+        )
+        timenames <- seq_len(ncol(x))
+      }
+    }
+    stopifnot_(
+      identical(sort(timenames), timenames),
+      paste0(
+        "The numeric time indices based on column names of sequence object ", 
+        "are not numerically sorted. Please recode the column names.")
+    )
+    colnames(x) <- timenames 
   }
   sequence_lengths <- as.integer(sequence_lengths)
   dim(sequence_lengths) <- length(sequence_lengths)
@@ -236,6 +280,13 @@
     !is.null(data[[time]]), 
     "Can't find time index variable {.var {time}} in {.arg data}."
   )
+  stopifnot_(
+    is.numeric(data[[time]]),
+    c(
+      "Time index variable {.arg {time}} must be of type {.cls numeric} or 
+      {.cls integer}."
+    )
+  )
   data <- data[order(data[[id]], data[[time]]), ]
   fill_time(data, id, time)
 }
diff --git a/R/estimate_mnhmm.R b/R/estimate_mnhmm.R
@@ -43,7 +43,7 @@ estimate_mnhmm <- function(
     transition_formula = ~1, emission_formula = ~1, cluster_formula = ~1,
     data = NULL, time = NULL, id = NULL, state_names = NULL, 
     channel_names = NULL, cluster_names = NULL, inits = "random", init_sd = 2, 
-    restarts = 0L, threads = 1L, store_data = TRUE, hessian = FALSE, ...) {
+    restarts = 0L, threads = 1L, store_data = TRUE, penalty = 0, hessian = FALSE, ...) {
 
   call <- match.call()
   model <- build_mnhmm(
@@ -58,7 +58,7 @@ estimate_mnhmm <- function(
   if (store_data) {
     model$data <- data
   }
-  out <- fit_mnhmm(model, inits, init_sd, restarts, threads, hessian, ...)
+  out <- fit_mnhmm(model, inits, init_sd, restarts, threads, penalty, hessian, ...)
 
   attr(out, "call") <- call
   out

diff --git a/R/estimate_nhmm.R b/R/estimate_nhmm.R
@@ -43,6 +43,8 @@
 #' is stored to the model object. For large datasets, this can be set to 
 #' `FALSE`, in which case you might need to pass the data separately to some 
 #' post-prosessing functions.
+#' @param penalty Penalty for penalized maximum likelihood. Default is `0` 
+#' i.e. no penalization as the penalty is of form `0.5 * sum(pars^2) * penalty`.
 #' @param hessian If `TRUE`, computes the Hessian of the model 
 #' coefficients using [numDeriv::jacobian]. Default is `FALSE`. Can also be 
 #' a list with components `method`, `side` and `method.args` to 
@@ -73,7 +75,7 @@ estimate_nhmm <- function(
     transition_formula = ~1, emission_formula = ~1, 
     data = NULL, time = NULL, id = NULL, state_names = NULL, channel_names = NULL, 
     inits = "random", init_sd = 2, restarts = 0L, threads = 1L, 
-    store_data = TRUE, hessian = FALSE, ...) {
+    store_data = TRUE, penalty = 0, hessian = FALSE, ...) {
 
   call <- match.call()
 
@@ -89,7 +91,7 @@ estimate_nhmm <- function(
   if (store_data) {
     model$data <- data
   }
-  out <- fit_nhmm(model, inits, init_sd, restarts, threads, hessian, ...)
+  out <- fit_nhmm(model, inits, init_sd, restarts, threads, penalty, hessian, ...)
   attr(out, "call") <- call
   out
 }
diff --git a/R/fit_mnhmm.R b/R/fit_mnhmm.R
@@ -1,7 +1,7 @@
 #' Estimate a Mixture Non-homogeneous Hidden Markov Model
 #'
 #' @noRd
-fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
+fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty, hessian, ...) {
   stopifnot_(
     checkmate::test_int(x = threads, lower = 1L), 
     "Argument {.arg threads} must be a single positive integer."
@@ -56,6 +56,10 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
   if (model$n_channels == 1L) {
     if (need_grad) {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(list(objective = Inf, gradient = rep(-Inf, length(pars))))
+        } 
+
         gamma_pi_raw <- create_gamma_pi_raw_mnhmm(pars[seq_len(n_i)], S, K_i, D)
         gamma_A_raw <- create_gamma_A_raw_mnhmm(pars[n_i + seq_len(n_s)], S, K_s, D)
         gamma_B_raw <- create_gamma_B_raw_mnhmm(
@@ -69,11 +73,14 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega, 
           Ti
         )
-        list(objective = - out$loglik,
-             gradient = - unlist(out[-1]))
+        list(objective = - out$loglik + 0.5 * sum(pars^2) * penalty,
+             gradient = - unlist(out[-1]) + pars * penalty)
       }
     } else {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(Inf)
+        } 
         gamma_pi_raw <- create_gamma_pi_raw_mnhmm(pars[seq_len(n_i)], S, K_i, D)
         gamma_A_raw <- create_gamma_A_raw_mnhmm(pars[n_i + seq_len(n_s)], S, K_s, D)
         gamma_B_raw <- create_gamma_B_raw_mnhmm(
@@ -87,12 +94,15 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_omega_raw, X_d, obs
         )
 
-        - sum(apply(out[, T_, ], 2, logSumExp))
+        - sum(apply(out[, T_, ], 2, logSumExp)) + 0.5 * sum(pars^2) * penalty
       }
     }
   } else {
     if (need_grad) {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(list(objective = Inf, gradient = rep(-Inf, length(pars))))
+        }
         gamma_pi_raw <- create_gamma_pi_raw_mnhmm(pars[seq_len(n_i)], S, K_i, D)
         gamma_A_raw <- create_gamma_A_raw_mnhmm(
           pars[n_i + seq_len(n_s)], 
@@ -112,11 +122,14 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega,
           Ti
         )
-        list(objective = - out$loglik,
-             gradient = - unlist(out[-1]))
+        list(objective = - out$loglik + 0.5 * sum(pars^2) * penalty,
+             gradient = - unlist(out[-1]) + pars * penalty)
       }
     } else {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(Inf)
+        } 
         gamma_pi_raw <- create_gamma_pi_raw_mnhmm(pars[seq_len(n_i)], S, K_i, D)
         gamma_A_raw <- create_gamma_A_raw_mnhmm(
           pars[n_i + seq_len(n_s)], S, K_s, D
@@ -137,7 +150,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_omega_raw, X_d,
           obs, M)
 
-        - sum(apply(out[, T_, ], 2, logSumExp))
+        - sum(apply(out[, T_, ], 2, logSumExp)) + 0.5 * sum(pars^2) * penalty
       }
     }
   }
@@ -164,7 +177,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
     },
     future.seed = TRUE)
 
-    logliks <- unlist(lapply(out, "[[", "objective"))
+    logliks <- -unlist(lapply(out, "[[", "objective"))
     return_codes <- unlist(lapply(out, "[[", "status"))
     successful <- which(return_codes > 0)
     optimum <- successful[which.max(logliks[successful])]
@@ -222,12 +235,13 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
 
   model$estimation_results <- list(
     hessian = hessian,
-    loglik = out$objective, 
+    loglik = -out$objective, 
     return_code = out$status,
     message = out$message,
     iterations = out$iterations,
     logliks_of_restarts = if(restarts > 0L) logliks else NULL, 
-    return_codes_of_restarts = if(restarts > 0L) return_codes else NULL
+    return_codes_of_restarts = if(restarts > 0L) return_codes else NULL,
+    penalty = penalty
   )
   model
 }
diff --git a/R/fit_nhmm.R b/R/fit_nhmm.R
@@ -1,7 +1,7 @@
 #' Estimate a Non-homogeneous Hidden Markov Model
 #'
 #' @noRd
-fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
+fit_nhmm <- function(model, inits, init_sd, restarts, threads, penalty, hessian, ...) {
   stopifnot_(
     checkmate::test_int(x = threads, lower = 1L), 
     "Argument {.arg threads} must be a single positive integer."
@@ -52,6 +52,9 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
   if (model$n_channels == 1L) {
     if (need_grad) {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(list(objective = Inf, gradient = rep(-Inf, length(pars))))
+        }
         gamma_pi_raw <- create_gamma_pi_raw_nhmm(pars[seq_len(n_i)], S, K_i)
         gamma_A_raw <- create_gamma_A_raw_nhmm(pars[n_i + seq_len(n_s)], S, K_s)
         gamma_B_raw <- create_gamma_B_raw_nhmm(
@@ -61,11 +64,14 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs,
           iv_pi, iv_A, iv_B, tv_A, tv_B, Ti
         )
-        list(objective = - out$loglik,
-             gradient = - unlist(out[-1]))
+        list(objective = - out$loglik + 0.5 * sum(pars^2) * penalty,
+             gradient = - unlist(out[-1]) + pars * penalty)
       }
     } else {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(Inf)
+        } 
         gamma_pi_raw <- create_gamma_pi_raw_nhmm(pars[seq_len(n_i)], S, K_i)
         gamma_A_raw <- create_gamma_A_raw_nhmm(pars[n_i + seq_len(n_s)], S, K_s)
         gamma_B_raw <- create_gamma_B_raw_nhmm(
@@ -75,12 +81,15 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs
         )
 
-        - sum(apply(out[, T_, ], 2, logSumExp))
+        - sum(apply(out[, T_, ], 2, logSumExp)) + 0.5 * sum(pars^2) * penalty
       }
     }
   } else {
     if (need_grad) {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(list(objective = Inf, gradient = rep(-Inf, length(pars))))
+        }
         gamma_pi_raw <- create_gamma_pi_raw_nhmm(pars[seq_len(n_i)], S, K_i)
         gamma_A_raw <- create_gamma_A_raw_nhmm(pars[n_i + seq_len(n_s)], S, K_s)
         gamma_B_raw <- create_gamma_multichannel_B_raw_nhmm(
@@ -90,11 +99,14 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
           gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M,
           iv_pi, iv_A, iv_B, tv_A, tv_B, Ti
         )
-        list(objective = - out$loglik,
-             gradient = - unlist(out[-1]))
+        list(objective = - out$loglik + 0.5 * sum(pars^2) * penalty,
+             gradient = - unlist(out[-1]) + pars * penalty)
       }
     } else {
       objectivef <- function(pars) {
+        if (any(!is.finite(exp(pars)))) {
+          return(Inf)
+        }
         gamma_pi_raw <- create_gamma_pi_raw_nhmm(pars[seq_len(n_i)], S, K_i)
         gamma_A_raw <- create_gamma_A_raw_nhmm(pars[n_i + seq_len(n_s)], S, K_s)
         gamma_B_raw <- create_gamma_multichannel_B_raw_nhmm(
@@ -103,8 +115,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
         out <- forward_nhmm_multichannel(
           gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M
         )
-
-        - sum(apply(out[, T_, ], 2, logSumExp))
+        - sum(apply(out[, T_, ], 2, logSumExp)) + 0.5 * sum(pars^2) * penalty
       }
     }
   }
@@ -131,7 +142,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
     },
     future.seed = TRUE)
 
-    logliks <- unlist(lapply(out, "[[", "objective"))
+    logliks <- -unlist(lapply(out, "[[", "objective"))
     return_codes <- unlist(lapply(out, "[[", "status"))
     successful <- which(return_codes > 0)
     optimum <- successful[which.max(logliks[successful])]
@@ -182,12 +193,13 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
 
   model$estimation_results <- list(
     hessian = hessian,
-    loglik = out$objective, 
+    loglik = -out$objective, 
     return_code = out$status,
     message = out$message,
     iterations = out$iterations,
     logliks_of_restarts = if(restarts > 0L) logliks else NULL, 
-    return_codes_of_restarts = if(restarts > 0L) return_codes else NULL
+    return_codes_of_restarts = if(restarts > 0L) return_codes else NULL,
+    penalty = penalty
   )
   model
 }
diff --git a/R/simulate_nhmm.R b/R/simulate_nhmm.R
@@ -69,9 +69,9 @@ simulate_nhmm <- function(
     if (is.null(coefs$transition_probs)) coefs$transition_probs <- NULL
     if (is.null(coefs$emission_probs)) coefs$emission_probs <- NULL
   }
-  K_i <- dim(model$X_initial)[2]
-  K_s <- dim(model$X_transition)[3]
-  K_o <- dim(model$X_emission)[3]
+  K_i <- nrow(model$X_initial)
+  K_s <- nrow(model$X_transition)
+  K_o <- nrow(model$X_emission)
   model$coefficients <- create_initial_values(
     coefs, n_states, n_symbols, init_sd, K_i, K_s, K_o, 0, 0
   )

diff --git a/vignettes/seqHMM.Rnw b/vignettes/seqHMM.Rnw
@@ -474,7 +474,7 @@ p
 
 % Deprecated code: 
 
-<<gridplot1, fig.width=5.5, fig.height=3.5, dev.args=list(pointsize=10), echo=FALSE, fig.cap="Showing state distribution plots for women and men in the \\code{biofam} data. Two figures were defined with the \\code{ssp} function and then combined into one figure with the \\code{gridplot} function.", fig.align='center', fig.keep='last', cache = FALSE, eval = FALSE, echo = FALSE>>=
+<<gridplot1old, fig.width=5.5, fig.height=3.5, dev.args=list(pointsize=10), echo=FALSE, fig.cap="Showing state distribution plots for women and men in the \\code{biofam} data. Two figures were defined with the \\code{ssp} function and then combined into one figure with the \\code{gridplot} function.", fig.align='center', fig.keep='last', cache = FALSE, eval = FALSE, echo = FALSE>>=
 ssp_f <- ssp(
   list(
     marr_seq[biofam3c$covariates$sex == "woman", ],

diff --git a/vignettes/seqHMM_visualization.Rnw b/vignettes/seqHMM_visualization.Rnw
@@ -57,7 +57,6 @@ left_seq <- seqdef(biofam3c$left, start = 15, alphabet = c(
 @
 
 <<seqplotSingle, fig.width=6.5, fig.height=3, dev.args=list(pointsize=10), echo=FALSE, fig.cap="Sequence index plot (left) and state distribution plot (right) of annual family states for 100 individuals from the \\texttt{biofam} data.", fig.align='center', fig.keep='last', message=FALSE>>=
-gridplot(list(seqIplot, seqdplot), ncol = 3, col.prop = c(0.35, 0.4, 0.25))
 library(patchwork)
 p1 <- stacked_sequence_plot(biofam_seq[1:100, ], type = "i", legend_position = "none")
 p2 <- stacked_sequence_plot(biofam_seq[1:100, ], type = "d", legend_position = "right")