Skip to content

Commit

Permalink
sequence lengths, faster removal of voids
Browse files Browse the repository at this point in the history
  • Loading branch information
helske committed Sep 24, 2024
1 parent a5023a9 commit 3f3a341
Show file tree
Hide file tree
Showing 21 changed files with 515 additions and 313 deletions.
40 changes: 28 additions & 12 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,36 @@ forwardbackwardx <- function(transition, emission, init, obs, coef, X, numberOfS
.Call(`_seqHMM_forwardbackwardx`, transition, emission, init, obs, coef, X, numberOfStates, forwardonly, threads)
}

get_omega <- function(gamma_omega_raw, X, logspace) {
.Call(`_seqHMM_get_omega`, gamma_omega_raw, X, logspace)
get_omega <- function(gamma_raw, X, logspace) {
.Call(`_seqHMM_get_omega`, gamma_raw, X, logspace)
}

get_omega_all <- function(gamma_raw, X, logspace) {
.Call(`_seqHMM_get_omega_all`, gamma_raw, X, logspace)
}

get_pi <- function(gamma_raw, X, logspace) {
.Call(`_seqHMM_get_pi`, gamma_raw, X, logspace)
}

get_pi_all <- function(gamma_raw, X, logspace) {
.Call(`_seqHMM_get_pi_all`, gamma_raw, X, logspace)
}

get_A <- function(gamma_raw, X, logspace, tv) {
.Call(`_seqHMM_get_A`, gamma_raw, X, logspace, tv)
}

get_B <- function(gamma_raw, X, M, logspace, add_missing, tv) {
.Call(`_seqHMM_get_B`, gamma_raw, X, M, logspace, add_missing, tv)
get_A_all <- function(gamma_raw, X, logspace, tv) {
.Call(`_seqHMM_get_A_all`, gamma_raw, X, logspace, tv)
}

get_B <- function(gamma_raw, X, logspace, add_missing, tv) {
.Call(`_seqHMM_get_B`, gamma_raw, X, logspace, add_missing, tv)
}

get_B_all <- function(gamma_raw, X, logspace, add_missing, tv) {
.Call(`_seqHMM_get_B_all`, gamma_raw, X, logspace, add_missing, tv)
}

logLikHMM <- function(transition, emission, init, obs, threads) {
Expand Down Expand Up @@ -109,20 +125,20 @@ log_objective <- function(transition, emission, init, obs, ANZ, BNZ, INZ, nSymbo
.Call(`_seqHMM_log_objective`, transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols, threads)
}

log_objective_nhmm_singlechannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, iv_pi, iv_A, iv_B, tv_A, tv_B) {
.Call(`_seqHMM_log_objective_nhmm_singlechannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, iv_pi, iv_A, iv_B, tv_A, tv_B)
log_objective_nhmm_singlechannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, Ti) {
.Call(`_seqHMM_log_objective_nhmm_singlechannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, Ti)
}

log_objective_nhmm_multichannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B) {
.Call(`_seqHMM_log_objective_nhmm_multichannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B)
log_objective_nhmm_multichannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, Ti) {
.Call(`_seqHMM_log_objective_nhmm_multichannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, Ti)
}

log_objective_mnhmm_singlechannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega) {
.Call(`_seqHMM_log_objective_mnhmm_singlechannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega)
log_objective_mnhmm_singlechannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega, Ti) {
.Call(`_seqHMM_log_objective_mnhmm_singlechannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega, Ti)
}

log_objective_mnhmm_multichannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega) {
.Call(`_seqHMM_log_objective_mnhmm_multichannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega)
log_objective_mnhmm_multichannel <- function(gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega, Ti) {
.Call(`_seqHMM_log_objective_mnhmm_multichannel`, gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega, Ti)
}

log_objectivex <- function(transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols, coef, X, numberOfStates, threads) {
Expand Down
4 changes: 3 additions & 1 deletion R/build_mnhmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ build_mnhmm <- function(
np_pi = n_clusters * out$extras$np_pi,
np_A = n_clusters * out$extras$np_A,
np_B = n_clusters * out$extras$np_B,
np_omega = out$extras$np_omega
np_omega = out$extras$np_omega,
missing_X_transition = out$extras$missing_X_transition,
missing_X_emission = out$extras$missing_X_emission
)
}
4 changes: 3 additions & 1 deletion R/build_nhmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ build_nhmm <- function(
tv_B = out$extras$tv_B,
np_pi = out$extras$np_pi,
np_A = out$extras$np_A,
np_B = out$extras$np_B
np_B = out$extras$np_B,
missing_X_transition = out$extras$missing_X_transition,
missing_X_emission = out$extras$missing_X_emission
)
}
6 changes: 3 additions & 3 deletions R/check_build_arguments.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Create observations for the model objects
#'
#' Note that for historical reasons `length_of_sequences` refers to the maximum
#' length of sequences, whereas `sequence_lengths` refers to the actual non-void
#' lengths of each sequence.
#' Note that for backward compatibility reasons `length_of_sequences` refers
#' to the maximum length of sequences, whereas `sequence_lengths` refers to
#' the actual non-void lengths of each sequence.
#'@noRd
#'
.check_observations <- function(x, channel_names = NULL,
Expand Down
2 changes: 2 additions & 0 deletions R/create_base_nhmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ create_base_nhmm <- function(observations, data, time, id, n_states,
coef_names_cluster = if(mixture) omega$coef_names else NULL
),
extras = list(
missing_X_transition <- A$missing,
missing_X_emission <- B$missing,
np_pi = pi$n_pars,
np_A = A$n_pars,
np_B = B$n_pars,
Expand Down
7 changes: 5 additions & 2 deletions R/fit_mnhmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
K_s <- nrow(X_s)
K_o <- nrow(X_o)
K_d <- nrow(X_d)
Ti <- model$sequence_lengths

dots <- list(...)
if (is.null(dots$algorithm)) dots$algorithm <- "NLOPT_LD_LBFGS"
Expand All @@ -65,7 +66,8 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
)
out <- log_objective_mnhmm_singlechannel(
gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o,
gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega
gamma_omega_raw, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega,
Ti
)
list(objective = - out$loglik,
gradient = - unlist(out[-1]))
Expand Down Expand Up @@ -107,7 +109,8 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
)
out <- log_objective_mnhmm_multichannel(
gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o,
gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega
gamma_omega_raw, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega,
Ti
)
list(objective = - out$loglik,
gradient = - unlist(out[-1]))
Expand Down
5 changes: 3 additions & 2 deletions R/fit_nhmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
K_i <- nrow(X_i)
K_s <- nrow(X_s)
K_o <- nrow(X_o)
Ti <- model$sequence_lengths

dots <- list(...)
if (is.null(dots$algorithm)) dots$algorithm <- "NLOPT_LD_LBFGS"
Expand All @@ -58,7 +59,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
)
out <- log_objective_nhmm_singlechannel(
gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs,
iv_pi, iv_A, iv_B, tv_A, tv_B
iv_pi, iv_A, iv_B, tv_A, tv_B, Ti
)
list(objective = - out$loglik,
gradient = - unlist(out[-1]))
Expand Down Expand Up @@ -87,7 +88,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, hessian, ...) {
)
out <- log_objective_nhmm_multichannel(
gamma_pi_raw, X_i, gamma_A_raw, X_s, gamma_B_raw, X_o, obs, M,
iv_pi, iv_A, iv_B, tv_A, tv_B
iv_pi, iv_A, iv_B, tv_A, tv_B, Ti
)
list(objective = - out$loglik,
gradient = - unlist(out[-1]))
Expand Down
110 changes: 52 additions & 58 deletions R/get_probs.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@ get_initial_probs.nhmm <- function(model, ...) {
d <- data.frame(
id = rep(ids, each = model$n_states),
state = model$state_names,
estimate = c(apply(
model$X_initial, 2, function(z) {
get_pi(model$coefficients$gamma_pi_raw, z, FALSE)
}
))
estimate = c(
get_pi_all(model$coefficients$gamma_pi_raw, model$X_initial, FALSE)
)
)
}
stats::setNames(d, c(model$id_variable, "state", "estimate"))
Expand All @@ -66,8 +64,9 @@ get_initial_probs.mnhmm <- function(model, ...) {
cluster = model$cluster_names[i],
id = rep(ids, each = model$n_states),
state = model$state_names[[i]],
estimate = get_pi(
model$coefficients$gamma_pi_raw[[i]], model$X_initial[, 1L], FALSE
estimate = c(
get_pi(model$coefficients$gamma_pi_raw[[i]], model$X_initial[, 1L],
FALSE)
)
)
})
Expand All @@ -80,11 +79,11 @@ get_initial_probs.mnhmm <- function(model, ...) {
cluster = model$cluster_names[i],
id = rep(ids, each = model$n_states),
state = model$state_names[[i]],
estimate = c(apply(
model$X_initial, 2, function(z) {
get_pi(model$coefficients$gamma_pi_raw[[i]], z, FALSE)
}
))
estimate = c(
get_pi_all(
model$coefficients$gamma_pi_raw[[i]], model$X_initial, FALSE
)
)
)
})
)
Expand All @@ -109,6 +108,7 @@ get_initial_probs.mhmm <- function(model, ...) {
get_transition_probs.nhmm <- function(model, ...) {
S <- model$n_states
T_ <- model$length_of_sequences
model$X_transition[attr(model, "missing_X_transition")] <- NA
if (model$n_channels == 1L) {
ids <- rownames(model$observations)
times <- colnames(model$observations)
Expand All @@ -133,14 +133,12 @@ get_transition_probs.nhmm <- function(model, ...) {
time = rep(times, each = S^2),
state_from = model$state_names,
state_to = rep(model$state_names, each = S),
estimate = c(apply(
model$X_transition, 3, function(z) {
get_A(
model$coefficients$gamma_A_raw, matrix(z, ncol = T_), FALSE,
attr(model, "tv_A")
)
}
))
estimate = c(
get_A_all(
model$coefficients$gamma_A_raw, model$X_transition, FALSE,
attr(model, "tv_A")
)
)
)
}
stats::setNames(
Expand All @@ -157,6 +155,7 @@ get_transition_probs.mnhmm <- function(model, ...) {
S <- model$n_states
T_ <- model$length_of_sequences
D <- model$n_clusters
model$X_transition[attr(model, "missing_X_transition")] <- NA
if (model$n_channels == 1L) {
ids <- rownames(model$observations)
times <- colnames(model$observations)
Expand Down Expand Up @@ -191,14 +190,12 @@ get_transition_probs.mnhmm <- function(model, ...) {
time = rep(times, each = S^2),
state_from = model$state_names[[i]],
state_to = rep(model$state_names[[i]], each = S),
estimate = c(apply(
model$X_transition, 3, function(z) {
get_A(
model$coefficients$gamma_A_raw[[i]], matrix(z, ncol = T_), FALSE,
attr(model, "tv_A")
)
}
))
estimate = c(
get_A_all(
model$coefficients$gamma_A_raw[[i]], model$X_transition, FALSE,
attr(model, "tv_A")
)
)
)
})
)
Expand Down Expand Up @@ -229,6 +226,7 @@ get_emission_probs.nhmm <- function(model, ...) {
C <- model$n_channels
T_ <- model$length_of_sequences
M <- model$n_symbols
model$X_emission[attr(model, "missing_X_emission")] <- NA
if (C == 1L) {
ids <- rownames(model$observations)
times <- colnames(model$observations)
Expand All @@ -250,8 +248,8 @@ get_emission_probs.nhmm <- function(model, ...) {
state = model$state_names,
channel = model$channel_names[i],
observation = rep(symbol_names[[i]], each = S),
estimate = unlist(get_B(
model$coefficients$gamma_B_raw[i], X, M[i], FALSE, FALSE,
estimate = c(get_B(
model$coefficients$gamma_B_raw[[i]], X, FALSE, FALSE,
attr(model, "tv_B"))
)
)
Expand All @@ -267,13 +265,11 @@ get_emission_probs.nhmm <- function(model, ...) {
state = model$state_names,
channel = model$channel_names[i],
observation = rep(symbol_names[[i]], each = S),
estimate = apply(
model$X_emission, 3, function(z) {
unlist(get_B(
model$coefficients$gamma_B_raw[i], matrix(z, ncol = T_), M[i],
FALSE, FALSE, attr(model, "tv_B")
))
}
estimate = c(
get_B_all(
model$coefficients$gamma_B_raw[[i]], model$X_emission,
FALSE, FALSE, attr(model, "tv_B")
)
)
)
})
Expand All @@ -293,14 +289,14 @@ get_emission_probs.mnhmm <- function(model, ...) {
D <- model$n_clusters
T_ <- model$length_of_sequences
M <- model$n_symbols
model$X_emission[attr(model, "missing_X_emission")] <- NA
if (C == 1L) {
ids <- rownames(model$observations)
times <- colnames(model$observations)
symbol_names <- list(model$symbol_names)
for (i in seq_len(D)) {
model$coefficients$gamma_B_raw[[i]] <-
list(model$coefficients$gamma_B_raw[[i]])
}
model$coefficients$gamma_B_raw <- lapply(
model$coefficients$gamma_B_raw, list
)
} else {
ids <- rownames(model$observations[[1]])
times <- colnames(model$observations[[1]])
Expand All @@ -322,7 +318,7 @@ get_emission_probs.mnhmm <- function(model, ...) {
channel = model$channel_names[i],
observation = rep(symbol_names[[i]], each = S),
estimate = unlist(get_B(
model$coefficients$gamma_B_raw[[j]][i], X, M[i], FALSE, FALSE,
model$coefficients$gamma_B_raw[[j]][[i]], X, FALSE, FALSE,
attr(model, "tv_B"))
)
)
Expand All @@ -344,13 +340,11 @@ get_emission_probs.mnhmm <- function(model, ...) {
state = model$state_names[[j]],
channel = model$channel_names[i],
observation = rep(symbol_names[[i]], each = S),
estimate = apply(
model$X_emission, 3, function(z) {
unlist(get_B(
model$coefficients$gamma_B_raw[[j]][i],
matrix(z, ncol = T_), M[i], FALSE, FALSE, attr(model, "tv_B")
))
}
estimate = c(
get_B_all(
model$coefficients$gamma_B_raw[[j]][[i]], model$X_emission,
FALSE, FALSE,attr(model, "tv_B")
)
)
)
})
Expand Down Expand Up @@ -399,11 +393,11 @@ get_cluster_probs.mnhmm <- function(model, ...) {
d <- data.frame(
cluster = model$cluster_names,
id = rep(ids, each = model$n_clusters),
estimate = c(apply(
model$X_cluster, 2, function(z) {
get_omega(model$coefficients$gamma_omega_raw, z, FALSE)
}
))
estimate = c(
get_omega_all(
model$coefficients$gamma_omega_raw, model$X_cluster, FALSE
)
)
)
}
stats::setNames(d, c("cluster", model$id_variable, "estimate"))
Expand Down Expand Up @@ -478,8 +472,8 @@ get_probs.nhmm <- function(model, newdata = NULL, remove_voids = TRUE, ...) {
if (remove_voids) {
list(
initial_probs = out$initial_probs,
transition_probs = remove_voids(model, out$transition_probs),
emission_probs = remove_voids(model, out$emission_probs)
transition_probs = out$transition_probs[complete.cases(out$transition_probs), ],
emission_probs = out$emission_probs[complete.cases(out$emission_probs), ]
)
} else out
}
Expand Down Expand Up @@ -526,8 +520,8 @@ get_probs.mnhmm <- function(model, newdata = NULL, remove_voids = TRUE, ...) {
if (remove_voids) {
list(
initial_probs = out$initial_probs,
transition_probs = remove_voids(model, out$transition_probs),
emission_probs = remove_voids(model, out$emission_probs),
transition_probs = out$transition_probs[complete.cases(out$transition_probs), ],
emission_probs = out$emission_probs[complete.cases(out$emission_probs), ],
cluster_probs = out$cluster_probs
)
} else out
Expand Down
Loading

0 comments on commit 3f3a341

Please sign in to comment.