From 37e00a331f9fdaab1178dd965b3f55092252b727 Mon Sep 17 00:00:00 2001 From: Jouni Helske Date: Wed, 25 Sep 2024 21:24:07 +0300 Subject: [PATCH] alpha --- R/simulate_hmm.R | 1 - R/simulate_pars.R | 162 ++++++++++++++++++++++++---------------------- 2 files changed, 85 insertions(+), 78 deletions(-) diff --git a/R/simulate_hmm.R b/R/simulate_hmm.R index dc9fcd80..60a7f047 100644 --- a/R/simulate_hmm.R +++ b/R/simulate_hmm.R @@ -7,7 +7,6 @@ #' @param transition_probs A matrix of transition probabilities. #' @param emission_probs A matrix of emission probabilities or a list of such objects (one for each channel). #' @param sequence_length Length for simulated sequences. -#' #' @return A list of state sequence objects of class `stslist`. #' @seealso [build_hmm()] and [fit_model()] for building #' and fitting hidden Markov models; [stacked_sequence_plot()] for plotting diff --git a/R/simulate_pars.R b/R/simulate_pars.R index 7312a610..b038a5ee 100644 --- a/R/simulate_pars.R +++ b/R/simulate_pars.R @@ -1,77 +1,85 @@ -#' Simulate Parameters of Hidden Markov Models -#' -#' These are helper functions for quick construction of initial values for various -#' model building functions. -#' Mostly useful for global optimization algorithms which do not depend on initial values. -#' -#' -#' @export -#' @param n_states Number of states in each cluster. -#' @param n_clusters Number of clusters. -#' @param left_right Constrain the transition probabilities to upper triangular. -#' Default is `FALSE`. -#' @param diag_c A constant value to be added to diagonal of transition matrices before scaling. -#' @param n_symbols Number of distinct symbols in each channel. -#' @rdname simulate_pars -#' @seealso [build_hmm()], [build_mhmm()], -#' [build_mm()], [build_mmm()], and [build_lcm()] -#' for constructing different types of models. -simulate_initial_probs <- function(n_states, n_clusters = 1) { - n_states <- rep(n_states, length = n_clusters) - - if (n_clusters == 1) { - x <- runif(n_states) - x / sum(x) - } else { - probs <- vector("list", n_clusters) - for (i in 1:n_clusters) { - x <- runif(n_states[i]) - probs[[i]] <- x / sum(x) - } - probs - } -} -#' @export -#' @rdname simulate_pars -simulate_transition_probs <- function(n_states, n_clusters = 1, left_right = FALSE, diag_c = 0) { - n_states <- rep(n_states, length = n_clusters) - if (n_clusters == 1) { - x <- matrix(runif(n_states^2), n_states, n_states) + diag(diag_c, n_states) - if (left_right) x[lower.tri(x)] <- 0 - probs <- x / rowSums(x) - } else { - probs <- vector("list", n_clusters) - for (i in 1:n_clusters) { - x <- matrix(runif(n_states[i]^2), n_states[i], n_states[i]) + diag(diag_c, n_states[i]) - if (left_right) x[lower.tri(x)] <- 0 - probs[[i]] <- x / rowSums(x) - } - } - probs -} -#' @export -#' @rdname simulate_pars -simulate_emission_probs <- function(n_states, n_symbols, n_clusters = 1) { - n_channels <- length(n_symbols) - emiss <- vector("list", n_clusters) - n_states <- rep(n_states, length = n_clusters) - if (n_channels > 1) { - for (i in 1:n_clusters) { - emiss[[i]] <- vector("list", n_channels) - for (j in seq_len(n_channels)) { - emiss[[i]][[j]] <- matrix(runif(n_states[i] * n_symbols[j]), n_states[i], n_symbols[j]) - emiss[[i]][[j]] <- emiss[[i]][[j]] / rowSums(emiss[[i]][[j]]) - } - } - } else { - for (i in 1:n_clusters) { - emiss[[i]] <- matrix(runif(n_states[i] * n_symbols), n_states[i], n_symbols) - emiss[[i]] <- emiss[[i]] / rowSums(emiss[[i]]) - } - } - if (n_clusters == 1) { - emiss[[1]] - } else { - emiss - } -} +#' Simulate Parameters of Hidden Markov Models +#' +#' These are helper functions for quick construction of initial values for various +#' model building functions. +#' Mostly useful for global optimization algorithms which do not depend on initial values. +#' +#' +#' @export +#' @param n_states Number of states in each cluster. +#' @param n_clusters Number of clusters. +#' @param left_right Constrain the transition probabilities to upper triangular. +#' Default is `FALSE`. +#' @param diag_c A constant value to be added to diagonal of transition matrices before scaling. +#' @param n_symbols Number of distinct symbols in each channel. +#' @param alpha A scalar, or a vector of length S (number of states) or M +#' (number of symbols) defining the parameters of the Dirichlet distribution +#' used to simulate the probabilities. +#' @rdname simulate_pars +simulate_initial_probs <- function(n_states, n_clusters = 1, alpha = 1) { + n_states <- rep(n_states, length = n_clusters) + + if (n_clusters == 1) { + x <- rgamma(n_states, alpha) + x / sum(x) + } else { + probs <- vector("list", n_clusters) + for (i in 1:n_clusters) { + x <- rgamma(n_states[i], alpha) + probs[[i]] <- x / sum(x) + } + probs + } +} +#' @export +#' @rdname simulate_pars +simulate_transition_probs <- function(n_states, n_clusters = 1, left_right = FALSE, diag_c = 0, alpha = 1) { + n_states <- rep(n_states, length = n_clusters) + if (n_clusters == 1) { + x <- matrix(rgamma(n_states^2, alpha), n_states, n_states, TRUE) + + diag(diag_c, n_states) + if (left_right) x[lower.tri(x)] <- 0 + probs <- x / rowSums(x) + } else { + probs <- vector("list", n_clusters) + for (i in seq_len(n_clusters)) { + x <- matrix( + rgamma(n_states[i]^2, alpha), n_states[i], n_states[i], TRUE + ) + diag(diag_c, n_states[i]) + if (left_right) x[lower.tri(x)] <- 0 + probs[[i]] <- x / rowSums(x) + } + } + probs +} +#' @export +#' @rdname simulate_pars +simulate_emission_probs <- function(n_states, n_symbols, n_clusters = 1, + alpha = 1) { + n_channels <- length(n_symbols) + emiss <- vector("list", n_clusters) + n_states <- rep(n_states, length = n_clusters) + if (n_channels > 1) { + for (i in seq_len(n_clusters)) { + emiss[[i]] <- vector("list", n_channels) + for (j in seq_len(n_channels)) { + emiss[[i]][[j]] <- matrix( + rgamma(n_states[i] * n_symbols[j], alpha), n_states[i], n_symbols[j], + TRUE) + emiss[[i]][[j]] <- emiss[[i]][[j]] / rowSums(emiss[[i]][[j]]) + } + } + } else { + for (i in seq_len(n_clusters)) { + emiss[[i]] <- matrix( + rgamma(n_states[i] * n_symbols), n_states[i], n_symbols, TRUE + ) + emiss[[i]] <- emiss[[i]] / rowSums(emiss[[i]]) + } + } + if (n_clusters == 1) { + emiss[[1]] + } else { + emiss + } +}