From 37e00a331f9fdaab1178dd965b3f55092252b727 Mon Sep 17 00:00:00 2001
From: Jouni Helske <jouni.helske@utu.fi>
Date: Wed, 25 Sep 2024 21:24:07 +0300
Subject: [PATCH] alpha

---
 R/simulate_hmm.R  |   1 -
 R/simulate_pars.R | 162 ++++++++++++++++++++++++----------------------
 2 files changed, 85 insertions(+), 78 deletions(-)

diff --git a/R/simulate_hmm.R b/R/simulate_hmm.R
index dc9fcd80..60a7f047 100644
--- a/R/simulate_hmm.R
+++ b/R/simulate_hmm.R
@@ -7,7 +7,6 @@
 #' @param transition_probs A matrix of transition probabilities.
 #' @param emission_probs A matrix of emission probabilities or a list of such objects (one for each channel).
 #' @param sequence_length Length for simulated sequences.
-#'
 #' @return A list of state sequence objects of class `stslist`.
 #' @seealso [build_hmm()] and [fit_model()] for building
 #' and fitting hidden Markov models; [stacked_sequence_plot()] for plotting
diff --git a/R/simulate_pars.R b/R/simulate_pars.R
index 7312a610..b038a5ee 100644
--- a/R/simulate_pars.R
+++ b/R/simulate_pars.R
@@ -1,77 +1,85 @@
-#' Simulate Parameters of Hidden Markov Models
-#'
-#' These are helper functions for quick construction of initial values for various
-#' model building functions.
-#' Mostly useful for global optimization algorithms which do not depend on initial values.
-#'
-#'
-#' @export
-#' @param n_states Number of states in each cluster.
-#' @param n_clusters Number of clusters.
-#' @param left_right Constrain the transition probabilities to upper triangular.
-#' Default is `FALSE`.
-#' @param diag_c A constant value to be added to diagonal of transition matrices before scaling.
-#' @param n_symbols Number of distinct symbols in each channel.
-#' @rdname simulate_pars
-#' @seealso [build_hmm()], [build_mhmm()],
-#'   [build_mm()], [build_mmm()], and  [build_lcm()]
-#'   for constructing different types of models.
-simulate_initial_probs <- function(n_states, n_clusters = 1) {
-  n_states <- rep(n_states, length = n_clusters)
-
-  if (n_clusters == 1) {
-    x <- runif(n_states)
-    x / sum(x)
-  } else {
-    probs <- vector("list", n_clusters)
-    for (i in 1:n_clusters) {
-      x <- runif(n_states[i])
-      probs[[i]] <- x / sum(x)
-    }
-    probs
-  }
-}
-#' @export
-#' @rdname simulate_pars
-simulate_transition_probs <- function(n_states, n_clusters = 1, left_right = FALSE, diag_c = 0) {
-  n_states <- rep(n_states, length = n_clusters)
-  if (n_clusters == 1) {
-    x <- matrix(runif(n_states^2), n_states, n_states) + diag(diag_c, n_states)
-    if (left_right) x[lower.tri(x)] <- 0
-    probs <- x / rowSums(x)
-  } else {
-    probs <- vector("list", n_clusters)
-    for (i in 1:n_clusters) {
-      x <- matrix(runif(n_states[i]^2), n_states[i], n_states[i]) + diag(diag_c, n_states[i])
-      if (left_right) x[lower.tri(x)] <- 0
-      probs[[i]] <- x / rowSums(x)
-    }
-  }
-  probs
-}
-#' @export
-#' @rdname simulate_pars
-simulate_emission_probs <- function(n_states, n_symbols, n_clusters = 1) {
-  n_channels <- length(n_symbols)
-  emiss <- vector("list", n_clusters)
-  n_states <- rep(n_states, length = n_clusters)
-  if (n_channels > 1) {
-    for (i in 1:n_clusters) {
-      emiss[[i]] <- vector("list", n_channels)
-      for (j in seq_len(n_channels)) {
-        emiss[[i]][[j]] <- matrix(runif(n_states[i] * n_symbols[j]), n_states[i], n_symbols[j])
-        emiss[[i]][[j]] <- emiss[[i]][[j]] / rowSums(emiss[[i]][[j]])
-      }
-    }
-  } else {
-    for (i in 1:n_clusters) {
-      emiss[[i]] <- matrix(runif(n_states[i] * n_symbols), n_states[i], n_symbols)
-      emiss[[i]] <- emiss[[i]] / rowSums(emiss[[i]])
-    }
-  }
-  if (n_clusters == 1) {
-    emiss[[1]]
-  } else {
-    emiss
-  }
-}
+#' Simulate Parameters of Hidden Markov Models
+#'
+#' These are helper functions for quick construction of initial values for various
+#' model building functions.
+#' Mostly useful for global optimization algorithms which do not depend on initial values.
+#'
+#'
+#' @export
+#' @param n_states Number of states in each cluster.
+#' @param n_clusters Number of clusters.
+#' @param left_right Constrain the transition probabilities to upper triangular.
+#' Default is `FALSE`.
+#' @param diag_c A constant value to be added to diagonal of transition matrices before scaling.
+#' @param n_symbols Number of distinct symbols in each channel.
+#' @param alpha A scalar, or a vector of length S (number of states) or M 
+#' (number of symbols) defining the parameters of the Dirichlet distribution 
+#' used to simulate the probabilities.
+#' @rdname simulate_pars
+simulate_initial_probs <- function(n_states, n_clusters = 1, alpha = 1) {
+  n_states <- rep(n_states, length = n_clusters)
+  
+  if (n_clusters == 1) {
+    x <- rgamma(n_states, alpha)
+    x / sum(x)
+  } else {
+    probs <- vector("list", n_clusters)
+    for (i in 1:n_clusters) {
+      x <- rgamma(n_states[i], alpha)
+      probs[[i]] <- x / sum(x)
+    }
+    probs
+  }
+}
+#' @export
+#' @rdname simulate_pars
+simulate_transition_probs <- function(n_states, n_clusters = 1, left_right = FALSE, diag_c = 0, alpha = 1) {
+  n_states <- rep(n_states, length = n_clusters)
+  if (n_clusters == 1) {
+    x <- matrix(rgamma(n_states^2, alpha), n_states, n_states, TRUE) + 
+      diag(diag_c, n_states)
+    if (left_right) x[lower.tri(x)] <- 0
+    probs <- x / rowSums(x)
+  } else {
+    probs <- vector("list", n_clusters)
+    for (i in seq_len(n_clusters)) {
+      x <- matrix(
+        rgamma(n_states[i]^2, alpha), n_states[i], n_states[i], TRUE
+      ) + diag(diag_c, n_states[i])
+      if (left_right) x[lower.tri(x)] <- 0
+      probs[[i]] <- x / rowSums(x)
+    }
+  }
+  probs
+}
+#' @export
+#' @rdname simulate_pars
+simulate_emission_probs <- function(n_states, n_symbols, n_clusters = 1, 
+                                    alpha = 1) {
+  n_channels <- length(n_symbols)
+  emiss <- vector("list", n_clusters)
+  n_states <- rep(n_states, length = n_clusters)
+  if (n_channels > 1) {
+    for (i in seq_len(n_clusters)) {
+      emiss[[i]] <- vector("list", n_channels)
+      for (j in seq_len(n_channels)) {
+        emiss[[i]][[j]] <- matrix(
+          rgamma(n_states[i] * n_symbols[j], alpha), n_states[i], n_symbols[j],
+          TRUE)
+        emiss[[i]][[j]] <- emiss[[i]][[j]] / rowSums(emiss[[i]][[j]])
+      }
+    }
+  } else {
+    for (i in seq_len(n_clusters)) {
+      emiss[[i]] <- matrix(
+        rgamma(n_states[i] * n_symbols), n_states[i], n_symbols, TRUE
+      )
+      emiss[[i]] <- emiss[[i]] / rowSums(emiss[[i]])
+    }
+  }
+  if (n_clusters == 1) {
+    emiss[[1]]
+  } else {
+    emiss
+  }
+}