Skip to content

Commit

Permalink
fixed various functions, added tests
Browse files Browse the repository at this point in the history
  • Loading branch information
helske committed Sep 1, 2024
1 parent 33d6c7e commit 5c7a386
Show file tree
Hide file tree
Showing 182 changed files with 19,763 additions and 17,045 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ vignettes/jss.bst
vignettes/jss.cls
^\.github$
^codecov\.yml$
^.covrignore
6 changes: 6 additions & 0 deletions .covrignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
R/mssplot-deprecated.R
R/plot.ssp-deprecated.R
R/ssp-deprecated.R
R/ssplot-deprecated.R
R/ssplotM-deprecated.R
R/SSplotter-deprecated.R
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: seqHMM
Title: Mixture Hidden Markov Models for Social Sequence Data and Other Multivariate, Multichannel Categorical Time Series
Version: 1.3.0
Date: 2024-08-01
Version: 2.0.0
Date: 2024-09-01
Authors@R:
c(person(given = "Jouni",
family = "Helske",
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,46 @@ S3method("state_names<-",hmm)
S3method("state_names<-",mhmm)
S3method(cluster_names,mhmm)
S3method(cluster_names,mnhmm)
S3method(coef,mnhmm)
S3method(coef,nhmm)
S3method(forward_backward,hmm)
S3method(forward_backward,mhmm)
S3method(forward_backward,mnhmm)
S3method(forward_backward,nhmm)
S3method(get_probs,mnhmm)
S3method(get_probs,nhmm)
S3method(hidden_paths,hmm)
S3method(hidden_paths,mhmm)
S3method(hidden_paths,mnhmm)
S3method(hidden_paths,nhmm)
S3method(logLik,hmm)
S3method(logLik,mhmm)
S3method(logLik,mnhmm)
S3method(logLik,nhmm)
S3method(plot,hmm)
S3method(plot,mhmm)
S3method(plot,ssp)
S3method(posterior_probs,hmm)
S3method(posterior_probs,mhmm)
S3method(posterior_probs,mnhmm)
S3method(posterior_probs,nhmm)
S3method(print,hmm)
S3method(print,mhmm)
S3method(print,mnhmm)
S3method(print,nhmm)
S3method(print,summary.mhmm)
S3method(state_names,hmm)
S3method(state_names,mhmm)
S3method(summary,mhmm)
S3method(summary,mnhmm)
S3method(summary,nhmm)
S3method(update,mnhmm)
S3method(update,nhmm)
S3method(vcov,mhmm)
export("cluster_names<-")
export("state_names<-")
export(alphabet)
export(average_marginal_effect)
export(build_hmm)
export(build_lcm)
export(build_mhmm)
Expand Down
16 changes: 8 additions & 8 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ get_A <- function(beta_raw, X, logspace) {
.Call(`_seqHMM_get_A`, beta_raw, X, logspace)
}

get_B <- function(beta_raw, X, logspace) {
.Call(`_seqHMM_get_B`, beta_raw, X, logspace)
get_B <- function(beta_raw, X, logspace, add_missing) {
.Call(`_seqHMM_get_B`, beta_raw, X, logspace, add_missing)
}

get_multichannel_B <- function(beta_raw, X, S, C, M, logspace) {
.Call(`_seqHMM_get_multichannel_B`, beta_raw, X, S, C, M, logspace)
get_multichannel_B <- function(beta_raw, X, S, C, M, logspace, add_missing) {
.Call(`_seqHMM_get_multichannel_B`, beta_raw, X, S, C, M, logspace, add_missing)
}

get_omega_i <- function(theta_raw, X, logspace) {
Expand All @@ -85,12 +85,12 @@ get_A_i <- function(beta_raw, X, logspace) {
.Call(`_seqHMM_get_A_i`, beta_raw, X, logspace)
}

get_B_i <- function(beta_raw, X, logspace) {
.Call(`_seqHMM_get_B_i`, beta_raw, X, logspace)
get_B_i <- function(beta_raw, X, logspace, add_missing) {
.Call(`_seqHMM_get_B_i`, beta_raw, X, logspace, add_missing)
}

get_multichannel_B_i <- function(beta_raw, X, S, C, M, logspace) {
.Call(`_seqHMM_get_multichannel_B_i`, beta_raw, X, S, C, M, logspace)
get_multichannel_B_i <- function(beta_raw, X, S, C, M, logspace, add_missing) {
.Call(`_seqHMM_get_multichannel_B_i`, beta_raw, X, S, C, M, logspace, add_missing)
}

logLikHMM <- function(transition, emission, init, obs, threads) {
Expand Down
137 changes: 137 additions & 0 deletions R/average_marginal_effect.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#' Average Marginal Effects of Covariates of Non-homogenous Hidden Markov Models
#'
#' @param model A Hidden Markov Model of class `nhmm` or `mnhmm`.
#' @param variable Name of the variable of interest.
#' @param values Vector containing two values for `variable`.
#' @param newdata Optional data frame which is used for marginalization.
#' @param nsim Non-negative integer defining the number of samples from the
#' normal approximation of the model parameters used in
#' computing the approximate quantiles of the estimates. If `0`, only point
#' estimates are returned.
#' @param probs Vector defining the quantiles of interest. Default is
#' `c(0.025, 0.975)`.
#' @export
average_marginal_effect <- function(
model, variable, values, newdata = NULL,
nsim = 0,
probs = c(0.025, 0.975)) {
stopifnot_(
checkmate::test_count(nsim),
"Argument {.arg nsim} should be a single non-negative integer."
)
stopifnot_(
checkmate::test_string(x = variable),
"Argument {.arg variable} must be a single character string."
)
stopifnot_(
length(values) != 2,
"Argument {.arg values} should contain two values for
variable {.var variable}.")
if (is.null(newdata)) {
time <- model$time_variable
id <- model$id_variable
stopifnot_(
is.data.frame(newdata),
"Argument {.arg newdata} must be a {.cls data.frame} object."
)
stopifnot_(
!is.null(newdata[[id]]),
"Can't find grouping variable {.var {id}} in {.arg newdata}."
)
stopifnot_(
!is.null(newdata[[time]]),
"Can't find time index variable {.var {time}} in {.arg newdata}."
)
stopifnot_(
!is.null(newdata[[variable]]),
"Can't find time variable {.var {variable}} in {.arg newdata}."
)
} else {
stopifnot(
!is.null(object$data),
"Model does not contain original data and argument {.arg newdata} is
{.var NULL}."
)
newdata <- model$data
}

beta_i_raw <- stan_to_cpp_initial(
model$estimation_results$parameters$beta_i_raw
)
beta_s_raw <- stan_to_cpp_transition(
model$estimation_results$parameters$beta_s_raw
)
beta_o_raw <- stan_to_cpp_emission(
model$estimation_results$parameters$beta_o_raw,
1,
C > 1
)
newdata[[variable]] <- values[1]
model <- update(model, newdata = newdata)
X_initial1 <- t(model$X_initial)
X_transition1 <- aperm(model$X_transition, c(3, 1, 2))
X_emission1 <- aperm(model$X_emission, c(3, 1, 2))
newdata[[variable]] <- values[2]
model <- update(model, newdata = newdata)
X_initial2 <- t(model$X_initial)
X_transition2 <- aperm(model$X_transition, c(3, 1, 2))
X_emission2 <- aperm(model$X_emission, c(3, 1, 2))

ame_pi <- get_pi(beta_i_raw, X_initial1, 0) -
get_pi(beta_i_raw, X_initial2, 0)
ame_A <- get_A(beta_s_raw, X_transition1, 0) -
get_A(beta_s_raw, X_transition2, 0)
ame_B <- if (model$n_channels == 1) {
get_B(beta_o_raw, X_emission1, 0) - get_B(beta_o_raw, X_emission2, 0)
} else {
get_multichannel_B(beta_o_raw, X_emission1, S, C, M, 0) -
get_multichannel_B(beta_o_raw, X_emission2, S, C, M, 0)
}
browser()
if (nsim > 0) {
stopifnot_(
checkmate::test_numeric(
x = probs, lower = 0, upper = 1, any.missing = FALSE, min.len = 1L
),
"Argument {.arg probs} must be a {.cls numeric} vector with values
between 0 and 1."
)
chol_precision <- chol(-model$estimation$hessian)
U <- backsolve(chol_precision, diag(ncol(chol_precision)))
x <- matrix(rnorm(nsim * ncol(U)), nrow = nsim) %*% U
x <- t(sweep(x, 2, c(beta_i_raw, beta_s_raw, beta_o_raw), "+"))
p_i <- length(beta_i_raw)
p_s <- length(beta_s_raw)
p_o <- length(beta_o_raw)

pi_samples <- apply(
x[seq_len(p_i), ], 2, function(z) {
z <- array(z, dim = dim(beta_i_raw))
z <- stan_to_cpp_initial(z)
get_pi(z, X_initial1) - get_pi(z, X_initial2)
}
)
A_samples <- apply(
x[p_i + seq_len(p_s), ], 2, function(z) {
z <- array(z, dim = dim(beta_s_raw))
z <- stan_to_cpp_transition(z)
unlist(get_A(z, X_transition1)) -
unlist(get_A(z, X_transition2))
}
)
B_samples <- apply(
x[p_i + p_s + seq_len(p_o), ], 2, function(z) {
z <- array(z, dim = dim(beta_o_raw))
z <- stan_to_cpp_emission(z, 1, model$n_channels > 1)
unlist(get_B(aperm(z, c(2, 3, 1)), X_emission1)) -
unlist(get_B(aperm(z, c(2, 3, 1)), X_emission2))
}
)

quantiles <- fast_quantiles(samples, probs)
for(i in seq_along(probs)) {
transition_probs[paste0("q", 100 * probs[i])] <- quantiles[, i]
}
}
class(out) <- "ame"
}
3 changes: 2 additions & 1 deletion R/build_hmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ build_hmm <- function(observations, n_states, transition_probs, emission_probs,
length_of_sequences = attr(observations, "length_of_sequences"),
n_sequences = attr(observations, "n_sequences"),
n_symbols = n_symbols, n_states = n_states,
n_channels = n_channels
n_channels = n_channels,
call = match.call()
),
class = "hmm",
nobs = attr(observations, "nobs"),
Expand Down
1 change: 1 addition & 0 deletions R/build_lcm.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ build_lcm <- function(observations, n_clusters, emission_probs,
formula = formula, data = data, coefficients = coefficients,
cluster_names = cluster_names, state_names = cluster_names,
channel_names = channel_names)
model$call <- match.call()
attr(model, "type") <- "lcm"
model
}
46 changes: 29 additions & 17 deletions R/build_mhmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,15 +275,15 @@ build_mhmm <- function(observations,
symbol_names <- attr(observations, "symbol_names")
n_sequences <- attr(observations, "n_sequences")

inits_given <- !missing(transition_probs) || !missing(initial_probs) || !missing(emission_probs)
inits_given <- !missing(transition_probs) && !missing(initial_probs) && !missing(emission_probs)
n_states_given <- !missing(n_states)
stopifnot_(
inits_given || n_states_given,
"Provide either {.arg n_states} or all three of {.arg initial_probs}, ",
"{.arg transition_probs}, and {.arg emission_probs}."
"Provide either {.arg n_states} or all three of {.arg initial_probs},
{.arg transition_probs}, and {.arg emission_probs}."
)

# if any initial values are given, ignore n_states and use these
# if all initial values are given, ignore n_states and use these
if (inits_given) {
stopifnot_(
is.list(transition_probs),
Expand Down Expand Up @@ -321,8 +321,8 @@ build_mhmm <- function(observations,
"The length of {.arg cluster_names} does not match the number of
clusters. Names were not used."
)
cluster_names <- paste("Cluster", seq_len(n_clusters))
}
cluster_names <- paste("Cluster", seq_len(n_clusters))
}
n_states <- integer(n_clusters)
if (is.null(state_names)) {
Expand Down Expand Up @@ -367,6 +367,9 @@ build_mhmm <- function(observations,
emission_probs <- simulate_emission_probs(
n_states = n_states, n_symbols = n_symbols, n_clusters = n_clusters
)
if (is.null(state_names)) {
state_names <- vector("list", n_clusters)
}
for (i in seq_len(n_clusters)) {
transition_probs[[i]] <- .check_transition_probs(
transition_probs[[i]],
Expand All @@ -376,7 +379,7 @@ build_mhmm <- function(observations,
initial_probs[[i]] <- .check_initial_probs(
initial_probs[[i]], n_states[i], state_names[[i]]
)
emission_probs <- .check_emission_probs(
emission_probs[[i]] <- .check_emission_probs(
emission_probs[[i]], n_states[i], n_channels, n_symbols,
state_names[[i]], symbol_names,channel_names
)
Expand All @@ -390,20 +393,24 @@ build_mhmm <- function(observations,
} else {
stopifnot_(
inherits(formula, "formula"),
"Argument {.arg formula} must be a {.cls formula} object.")
"{.arg formula} must be a {.cls formula} object.")
stopifnot_(
is.data.frame(data),
"If {.arg formula} is provided, {.arg data} must be a {.cls data.frame}
object."
paste0(
"If {.arg formula} is provided, {.arg data} must be a ",
"{.cls data.frame} object."
)
)
X <- model.matrix(formula, data)
if (nrow(X) != n_sequences) {
if (length(all.vars(formula)) > 0 &&
sum(!complete.cases(data[all.vars(formula)])) > 0) {
stop_(
"Missing cases are not allowed in covariates. Use e.g. the
{.fn complete.cases} to detect them, then fix, impute, or
remove."
paste0(
"Missing cases are not allowed in covariates. Use e.g. the ",
"{.fn complete.cases} to detect them, then fix, impute, or ",
"remove."
)
)
} else {
stop_(
Expand All @@ -427,24 +434,29 @@ build_mhmm <- function(observations,
}
rownames(coefficients) <- colnames(X)
colnames(coefficients) <- cluster_names
names(transition_probs) <- names(emission_probs) <- names(initial_probs) <- cluster_names
names(transition_probs) <- names(emission_probs) <-
names(initial_probs) <- cluster_names

model <- structure(
list(
observations = observations, transition_probs = transition_probs,
emission_probs = emission_probs, initial_probs = initial_probs,
coefficients = coefficients, X = X, cluster_names = cluster_names, state_names = state_names,
coefficients = coefficients, X = X, cluster_names = cluster_names,
state_names = state_names,
symbol_names = symbol_names, channel_names = channel_names,
length_of_sequences = attr(observations, "length_of_sequences"),
n_sequences = n_sequences, n_clusters = n_clusters,
n_symbols = n_symbols, n_states = n_states,
n_channels = n_channels,
n_covariates = n_covariates, formula = formula
n_covariates = n_covariates, formula = formula,
call = match.call()
),
class = "mhmm",
nobs = attr(observations, "nobs"),
df = sum(unlist(initial_probs) > 0) - n_clusters + sum(unlist(transition_probs) > 0) - sum(n_states) +
sum(unlist(emission_probs) > 0) - sum(n_states) * n_channels + n_covariates * (n_clusters - 1),
df = sum(unlist(initial_probs) > 0) - n_clusters +
sum(unlist(transition_probs) > 0) - sum(n_states) +
sum(unlist(emission_probs) > 0) -
sum(n_states) * n_channels + n_covariates * (n_clusters - 1),
type = "mhmm"
)
model
Expand Down
Loading

0 comments on commit 5c7a386

Please sign in to comment.