diff --git a/NEWS.md b/NEWS.md index 2487e85..50935bf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # SANple 0.1.1 +* Updated the MCMC functions to take a `burn in` value as input; +* Some algorithms ran `nrep-1` iterations. Updated to `nrep`; * Improved efficiency of stick-breaking computation; * Improved the initialization of the algorithms, streamlined some scripts; * Changed `.cpp` `for loops` indexes from `int` to `unsigned int` when needed; diff --git a/R/RcppExports.R b/R/RcppExports.R index 8aa60ac..c9c4c3a 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,23 +1,23 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -sample_cam_arma <- function(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, progressbar) { - .Call(`_SANple_sample_cam_arma`, nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, progressbar) +sample_cam_burn <- function(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, progressbar) { + .Call(`_SANple_sample_cam_burn`, nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, progressbar) } -sample_fcam_arma <- function(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) { - .Call(`_SANple_sample_fcam_arma`, nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) +sample_fcam_burn <- function(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) { + .Call(`_SANple_sample_fcam_burn`, nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) } -sample_ficam_arma <- function(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) { - .Call(`_SANple_sample_ficam_arma`, nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) +sample_ficam_burn <- function(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) { + .Call(`_SANple_sample_ficam_burn`, nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) } -sample_overcam_arma <- function(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) { - .Call(`_SANple_sample_overcam_arma`, nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) +sample_overcam_burn <- function(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) { + .Call(`_SANple_sample_overcam_burn`, nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar) } -sample_overficam_arma <- function(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) { - .Call(`_SANple_sample_overficam_arma`, nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) +sample_overficam_burn <- function(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) { + .Call(`_SANple_sample_overficam_burn`, nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar) } diff --git a/R/estimate_clusters.R b/R/estimate_clusters.R index f735917..3f06c00 100644 --- a/R/estimate_clusters.R +++ b/R/estimate_clusters.R @@ -19,7 +19,8 @@ #' set.seed(123) #' y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) #' g <- c(rep(1,30), rep(2, 30)) -#' out <- sample_fiSAN(nrep = 500, y = y, group = g, +#' out <- sample_fiSAN(nrep = 500, burn = 200, +#' y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' beta = 1) @@ -28,12 +29,19 @@ #' @export #' @importFrom salso salso #' @useDynLib SANple -estimate_clusters <- function(object, burnin = NULL, ncores = 0) +estimate_clusters <- function(object, burnin = 0, ncores = 0) { - if(is.null(burnin)) { burnin <- 1:round(object$params$nrep/3*2) } - estimated_oc <- suppressWarnings(salso::salso(object$sim$obs_cluster[-burnin,], nCores = ncores)) - estimated_dc <- suppressWarnings(salso::salso(object$sim$distr_cluster[-burnin,], nCores = ncores)) + if(burnin>0) { + OC <- object$sim$obs_cluster[-burnin,] + DC <- object$sim$distr_cluster[-burnin,] + }else{ + OC <- object$sim$obs_cluster + DC <- object$sim$distr_cluster + } + + estimated_oc <- suppressWarnings(salso::salso(OC, nCores = ncores)) + estimated_dc <- suppressWarnings(salso::salso(DC, nCores = ncores)) n_oc <- length(unique(estimated_oc)) n_dc <- length(unique(estimated_dc)) diff --git a/R/mcmc_CAM.R b/R/mcmc_CAM_burn.R similarity index 90% rename from R/mcmc_CAM.R rename to R/mcmc_CAM_burn.R index f5bc5e7..daf5d4b 100644 --- a/R/mcmc_CAM.R +++ b/R/mcmc_CAM_burn.R @@ -5,7 +5,7 @@ #' The implemented algorithm is based on the nested slice sampler of Denti et al. (2023), based on the algorithm of Kalli, Griffin and Walker (2011). #' #' @usage -#' sample_CAM(nrep, y, group, +#' sample_CAM(nrep, burn, y, group, #' maxK = 50, maxL = 50, #' m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, #' hyp_alpha1 = 1, hyp_alpha2 = 1, @@ -18,6 +18,7 @@ #' progress = TRUE, seed = NULL) #' #' @param nrep Number of MCMC iterations. +#' @param burn Number of discarded iterations. #' @param y Vector of observations. #' @param group Vector of the same length of y indicating the group membership (numeric). #' @param maxK Maximum number of distributional clusters (default = 50). @@ -127,7 +128,7 @@ #' g <- c(rep(1,30), rep(2, 30)) #' plot(density(y[g==1]), xlim = c(-5,10)) #' lines(density(y[g==2]), col = 2) -#' out <- sample_CAM(nrep = 500, y = y, group = g, +#' out <- sample_CAM(nrep = 500, burn = 200, y = y, group = g, #' nclus_start = 2, #' maxL = 20, maxK = 20) #' out @@ -143,7 +144,7 @@ #' @export sample_CAM #' #' @importFrom stats cor var dist hclust cutree rgamma -sample_CAM = function(nrep, y, group, +sample_CAM = function(nrep, burn, y, group, maxK = 50, maxL = 50, m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, @@ -163,13 +164,13 @@ sample_CAM = function(nrep, y, group, set.seed(seed) params <- list(nrep = nrep, - y = y, - group = group+1, - maxK = maxK, - maxL = maxL, - m0 = m0, tau0 = tau0, - lambda0 = lambda0, gamma0 = gamma0, - seed = seed) + y = y, + group = group+1, + maxK = maxK, + maxL = maxL, + m0 = m0, tau0 = tau0, + lambda0 = lambda0, gamma0 = gamma0, + seed = seed) if(!is.null(alpha)) { params$alpha <- alpha } if(!is.null(beta)) { params$beta <- beta } @@ -177,7 +178,7 @@ sample_CAM = function(nrep, y, group, if(is.null(alpha)) { params$hyp_alpha2 <- hyp_alpha2 } if(is.null(beta)) { params$hyp_beta1 <- hyp_beta1 } if(is.null(beta)) { params$hyp_beta2 <- hyp_beta2 } - + if(is.null(S_start)) { S_start <- rep(0,length(unique(group))) } # if the initial cluster allocation is passed @@ -207,9 +208,9 @@ sample_CAM = function(nrep, y, group, if(is.null(nclus_start)) { nclus_start = min(c(maxL, 30))} M_start <- stats::kmeans(y, - centers = nclus_start, - algorithm="MacQueen", - iter.max = 50)$cluster + centers = nclus_start, + algorithm="MacQueen", + iter.max = 50)$cluster # if the initial cluster allocation is not passed # and you want a warmstart @@ -223,7 +224,7 @@ sample_CAM = function(nrep, y, group, sigma2_start[1:nclus_start][sigma2_start[1:nclus_start]==0] <- 0.001 sigma2_start[is.na(sigma2_start)] <- 0.001 } - + # if the initial cluster allocation is not passed # and you don't want a warmstart if(!warmstart){ @@ -234,7 +235,7 @@ sample_CAM = function(nrep, y, group, sigma2_start[1] <- var(y)/2 } } - + M_start <- M_start-1 sigma2_start[is.na(sigma2_start)] <- 0.001 @@ -245,12 +246,16 @@ sample_CAM = function(nrep, y, group, fixed_alpha <- F fixed_beta <- F if(!is.null(alpha) ) { - fixed_alpha <- T } else { alpha <- 1 } + fixed_alpha <- T ; + alpha_start <- alpha + } else { alpha <- 1 } if(!is.null(beta) ) { - fixed_beta <- T } else { beta <- 1} + beta_start <- beta + fixed_beta <- T ; + eps_beta <- 1 } else { beta <- 1 } start = Sys.time() - out = sample_cam_arma(nrep, y, group, + out = sample_cam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, @@ -262,7 +267,7 @@ sample_CAM = function(nrep, y, group, M_start, S_start, alpha_start, beta_start, progress - ) + ) end = Sys.time() warnings <- out$warnings @@ -273,34 +278,34 @@ sample_CAM = function(nrep, y, group, if(length(warnings) == 2) { output <- list( "model" = "CAM", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxL and maxK: all the provided mixture components were used. Check $warnings to see when it happened.") } else if (length(warnings) == 1) { if((length(warnings$top_maxK)>0) & (length(warnings$top_maxL)==0)) { output <- list( "model" = "CAM", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxK: all the provided distributional mixture components were used. Check '$warnings' to see when it happened.") } if((length(warnings$top_maxK)==0) & (length(warnings$top_maxL)>0)) { output <- list( "model" = "CAM", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxL: all the provided observational mixture components were used. Check '$warnings' to see when it happened.") } } else { output <- list( "model" = "CAM", - "params" = params, - "sim" = out, - "time" = end - start ) + "params" = params, + "sim" = out, + "time" = end - start ) } structure(output, class = c("SANmcmc",class(output))) diff --git a/R/mcmc_fSAN.R b/R/mcmc_fSAN_burn.R similarity index 83% rename from R/mcmc_fSAN.R rename to R/mcmc_fSAN_burn.R index 3b77225..590620c 100644 --- a/R/mcmc_fSAN.R +++ b/R/mcmc_fSAN_burn.R @@ -5,7 +5,7 @@ #' #' #' @usage -#' sample_fSAN(nrep, y, group, +#' sample_fSAN(nrep, burn, y, group, #' maxK = 50, maxL = 50, #' m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, #' hyp_alpha1 = 6, hyp_alpha2 = 3, @@ -20,6 +20,7 @@ #' #' #' @param nrep Number of MCMC iterations. +#' @param burn Number of discarded iterations. #' @param y Vector of observations. #' @param group Vector of the same length of y indicating the group membership (numeric). #' @param maxK Maximum number of distributional clusters \eqn{K} (default = 50). @@ -130,7 +131,8 @@ #' g <- c(rep(1,30), rep(2, 30)) #' plot(density(y[g==1]), xlim = c(-5,10)) #' lines(density(y[g==2]), col = 2) -#' out <- sample_fSAN(nrep = 500, y = y, group = g, +#' out <- sample_fSAN(nrep = 500, burn = 200, +#' y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' alpha = 1, beta = 1) @@ -144,19 +146,19 @@ #' #' @export sample_fSAN #' @importFrom stats cor var dist hclust cutree rgamma -sample_fSAN <- function(nrep, y, group, - maxK = 50, maxL = 50, - m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, - hyp_alpha1 = 6, hyp_alpha2 = 3, - hyp_beta1 = 6, hyp_beta2 = 3, - eps_alpha = NULL, eps_beta = NULL, - alpha = NULL, beta = NULL, - warmstart = TRUE, nclus_start = NULL, - mu_start = NULL, sigma2_start = NULL, - M_start = NULL, S_start = NULL, - alpha_start = NULL, beta_start = NULL, - progress = TRUE, - seed = NULL) +sample_fSAN <- function(nrep, burn, y, group, + maxK = 50, maxL = 50, + m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, + hyp_alpha1 = 6, hyp_alpha2 = 3, + hyp_beta1 = 6, hyp_beta2 = 3, + eps_alpha = NULL, eps_beta = NULL, + alpha = NULL, beta = NULL, + warmstart = TRUE, nclus_start = NULL, + mu_start = NULL, sigma2_start = NULL, + M_start = NULL, S_start = NULL, + alpha_start = NULL, beta_start = NULL, + progress = TRUE, + seed = NULL) { group <- .relabell(group) - 1 @@ -164,13 +166,13 @@ sample_fSAN <- function(nrep, y, group, set.seed(seed) params <- list(nrep = nrep, - y = y, - group = group+1, - maxK = maxK, - maxL = maxL, - m0 = m0, tau0 = tau0, - lambda0 = lambda0, gamma0 = gamma0, - seed = seed) + y = y, + group = group+1, + maxK = maxK, + maxL = maxL, + m0 = m0, tau0 = tau0, + lambda0 = lambda0, gamma0 = gamma0, + seed = seed) if(!is.null(alpha)) { params$alpha <- alpha } if(!is.null(beta)) { params$beta <- beta } @@ -213,7 +215,7 @@ sample_fSAN <- function(nrep, y, group, mu_start[1] <- mean(y) sigma2_start <- rep(0.001, maxL) sigma2_start[1] <- var(y)/2 - } + } # if the initial cluster allocation is not passed # and you want a warmstart @@ -223,9 +225,9 @@ sample_fSAN <- function(nrep, y, group, if(is.null(nclus_start)) { nclus_start = min(c(maxL, 30))} M_start <- stats::kmeans(y, - centers = nclus_start, - algorithm="MacQueen", - iter.max = 50)$cluster + centers = nclus_start, + algorithm="MacQueen", + iter.max = 50)$cluster nclus_start <- length(unique(M_start)) mu_start[1:nclus_start] <- sapply(1:nclus_start, function(x) mean(y[M_start == x])) @@ -243,27 +245,32 @@ sample_fSAN <- function(nrep, y, group, fixed_alpha <- F fixed_beta <- F if(!is.null(alpha) ) { - fixed_alpha <- T ; eps_alpha <- 1 } else { alpha <- 1 } + fixed_alpha <- T ; + alpha_start <- alpha ; + eps_alpha <- 1 + } else { alpha <- 1 } if(!is.null(beta) ) { - fixed_beta <- T ; eps_beta <- 1 } else { beta <- 1 } + beta_start <- beta + fixed_beta <- T ; + eps_beta <- 1 } else { beta <- 1 } if((fixed_alpha == F)&(is.null(eps_alpha))) {stop("Missing eps parameter for MH step on alpha. Please provide 'eps_alpha' or a fixed 'alpha' value.")} if((fixed_beta == F)&(is.null(eps_beta))) {stop("Missing eps parameter for MH step on beta Please provide 'eps_beta' or a fixed 'beta' value.")} start <- Sys.time() - out <- sample_fcam_arma(nrep, y, group, - maxK, maxL, - m0, tau0, - lambda0, gamma0, - fixed_alpha, fixed_beta, - alpha, beta, - hyp_alpha1, hyp_alpha2, - hyp_beta1, hyp_beta2, - mu_start, sigma2_start, - M_start, S_start, - alpha_start, beta_start, - eps_alpha, eps_beta, - progress) + out <- sample_fcam_burn(nrep, burn, y, group, + maxK, maxL, + m0, tau0, + lambda0, gamma0, + fixed_alpha, fixed_beta, + alpha, beta, + hyp_alpha1, hyp_alpha2, + hyp_beta1, hyp_beta2, + mu_start, sigma2_start, + M_start, S_start, + alpha_start, beta_start, + eps_alpha, eps_beta, + progress) end <- Sys.time() warnings <- out$warnings @@ -275,36 +282,36 @@ sample_fSAN <- function(nrep, y, group, if(length(warnings) == 2) { output <- list( "model" = "fSAN", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxL and maxK: all the provided mixture components were used. Check $warnings to see when it happened.") } else if (length(warnings) == 1) { if((length(warnings$top_maxK)>0) & (length(warnings$top_maxL)==0)) { output <- list( "model" = "fSAN", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxK: all the provided distributional mixture components were used. Check '$warnings' to see when it happened.") } if((length(warnings$top_maxK)==0) & (length(warnings$top_maxL)>0)) { output <- list( "model" = "fSAN", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxL: all the provided observational mixture components were used. Check '$warnings' to see when it happened.") } } else { output <- list( "model" = "fSAN", - "params" = params, - "sim" = out, - "time" = end - start ) + "params" = params, + "sim" = out, + "time" = end - start ) } structure(output, class = c("SANmcmc",class(output))) - + } diff --git a/R/mcmc_fSAN_sparsemix.R b/R/mcmc_fSAN_sparse_burn.R similarity index 84% rename from R/mcmc_fSAN_sparsemix.R rename to R/mcmc_fSAN_sparse_burn.R index ab2e8ae..9649b29 100644 --- a/R/mcmc_fSAN_sparsemix.R +++ b/R/mcmc_fSAN_sparse_burn.R @@ -5,7 +5,7 @@ #' #' #' @usage -#' sample_fSAN_sparsemix(nrep, y, group, +#' sample_fSAN_sparsemix(nrep, burn, y, group, #' maxK = 50, maxL = 50, #' m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, #' hyp_alpha = 10, hyp_beta = 10, @@ -19,6 +19,7 @@ #' #' #' @param nrep Number of MCMC iterations. +#' @param burn Number of discarded iterations. #' @param y Vector of observations. #' @param group Vector of the same length of y indicating the group membership (numeric). #' @param maxK Maximum number of distributional clusters \eqn{K} (default = 50). @@ -125,7 +126,7 @@ #' g <- c(rep(1,30), rep(2, 30)) #' plot(density(y[g==1]), xlim = c(-5,10)) #' lines(density(y[g==2]), col = 2) -#' out <- sample_fSAN_sparsemix(nrep = 500, y = y, group = g, +#' out <- sample_fSAN_sparsemix(nrep = 500, burn = 200, y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' alpha = 0.01, beta = 0.01) @@ -139,18 +140,18 @@ #' #' @export sample_fSAN_sparsemix #' @importFrom stats cor var dist hclust cutree rgamma -sample_fSAN_sparsemix <- function(nrep, y, group, - maxK = 50, maxL = 50, - m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, - hyp_alpha = 10, hyp_beta = 10, - eps_alpha = NULL, eps_beta = NULL, - alpha = NULL, beta = NULL, - warmstart = TRUE, nclus_start = NULL, - mu_start = NULL, sigma2_start = NULL, - M_start = NULL, S_start = NULL, - alpha_start = NULL, beta_start = NULL, - progress = TRUE, - seed = NULL) +sample_fSAN_sparsemix <- function(nrep, burn, y, group, + maxK = 50, maxL = 50, + m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, + hyp_alpha = 10, hyp_beta = 10, + eps_alpha = NULL, eps_beta = NULL, + alpha = NULL, beta = NULL, + warmstart = TRUE, nclus_start = NULL, + mu_start = NULL, sigma2_start = NULL, + M_start = NULL, S_start = NULL, + alpha_start = NULL, beta_start = NULL, + progress = TRUE, + seed = NULL) { group <- .relabell(group) - 1 @@ -158,13 +159,13 @@ sample_fSAN_sparsemix <- function(nrep, y, group, set.seed(seed) params <- list(nrep = nrep, - y = y, - group = group+1, - maxK = maxK, - maxL = maxL, - m0 = m0, tau0 = tau0, - lambda0 = lambda0, gamma0 = gamma0, - seed = seed) + y = y, + group = group+1, + maxK = maxK, + maxL = maxL, + m0 = m0, tau0 = tau0, + lambda0 = lambda0, gamma0 = gamma0, + seed = seed) if(!is.null(alpha)) { params$alpha <- alpha } if(!is.null(beta)) { params$beta <- beta } @@ -215,9 +216,9 @@ sample_fSAN_sparsemix <- function(nrep, y, group, if(is.null(nclus_start)) { nclus_start <- min(c(maxL, 30))} M_start <- stats::kmeans(y, - centers = nclus_start, - algorithm="MacQueen", - iter.max = 50)$cluster + centers = nclus_start, + algorithm="MacQueen", + iter.max = 50)$cluster nclus_start <- length(unique(M_start)) mu_start[1:nclus_start] <- sapply(1:nclus_start, function(x) mean(y[M_start == x])) @@ -234,37 +235,45 @@ sample_fSAN_sparsemix <- function(nrep, y, group, fixed_alpha <- F fixed_beta <- F + if(!is.null(alpha) ) { - fixed_alpha <- T ; eps_alpha <- 1 } else { alpha <- 1 } + fixed_alpha <- T ; + alpha_start <- alpha + eps_alpha <- 1 + } else { alpha <- 1 } + if(!is.null(beta) ) { - fixed_beta <- T ; eps_beta <- 1 } else { beta <- 1 } + beta_start <- beta + fixed_beta <- T ; + eps_beta <- 1 + } else { beta <- 1 } if((fixed_alpha == F)&(is.null(eps_alpha))) {stop("Missing eps parameter for MH step on alpha. Please provide 'eps_alpha' or a fixed 'alpha' value.")} if((fixed_beta == F)&(is.null(eps_beta))) {stop("Missing eps parameter for MH step on beta Please provide 'eps_beta' or a fixed 'beta' value.")} - + start <- Sys.time() - out <- sample_overcam_arma(nrep, y, group, - maxK, maxL, - m0, tau0, - lambda0, gamma0, - fixed_alpha, fixed_beta, - alpha, beta, - hyp_alpha, hyp_beta, - mu_start, sigma2_start, - M_start, S_start, - alpha_start, beta_start, - eps_alpha, eps_beta, - progress) + out <- sample_overcam_burn(nrep, burn, y, group, + maxK, maxL, + m0, tau0, + lambda0, gamma0, + fixed_alpha, fixed_beta, + alpha, beta, + hyp_alpha, hyp_beta, + mu_start, sigma2_start, + M_start, S_start, + alpha_start, beta_start, + eps_alpha, eps_beta, + progress) end <- Sys.time() out$distr_cluster <- (out$distr_cluster + 1) out$obs_cluster <- (out$obs_cluster + 1) output <- list( "model" = "fSAN_sparsemix", - "params" = params, - "sim" = out, - "time" = end - start) + "params" = params, + "sim" = out, + "time" = end - start) structure(output, class = c("SANmcmc",class(output))) - + } diff --git a/R/mcmc_fiSAN.R b/R/mcmc_fiSAN_burn.R similarity index 87% rename from R/mcmc_fiSAN.R rename to R/mcmc_fiSAN_burn.R index d983a02..ab2303a 100644 --- a/R/mcmc_fiSAN.R +++ b/R/mcmc_fiSAN_burn.R @@ -7,7 +7,7 @@ #' #' #' @usage -#' sample_fiSAN(nrep, y, group, +#' sample_fiSAN(nrep, burn, y, group, #' maxK = 50, maxL = 50, #' m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, #' hyp_alpha1 = 1, hyp_alpha2 = 1, @@ -21,6 +21,7 @@ #' progress = TRUE, seed = NULL) #' #' @param nrep Number of MCMC iterations. +#' @param burn Number of discarded iterations. #' @param y Vector of observations. #' @param group Vector of the same length of y indicating the group membership (numeric). #' @param maxK Maximum number of distributional clusters \eqn{K} (default = 50). @@ -130,7 +131,7 @@ #' g <- c(rep(1,30), rep(2, 30)) #' plot(density(y[g==1]), xlim = c(-5,10)) #' lines(density(y[g==2]), col = 2) -#' out <- sample_fiSAN(nrep = 500, y = y, group = g, +#' out <- sample_fiSAN(nrep = 500, burn = 200, y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' beta = 1) @@ -147,19 +148,19 @@ #' #' @export sample_fiSAN #' @importFrom stats cor var dist hclust cutree rgamma -sample_fiSAN <- function(nrep, y, group, - maxK = 50, maxL = 50, - m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, - hyp_alpha1 = 1, hyp_alpha2 = 1, - hyp_beta1 = 6, hyp_beta2 = 3, - eps_beta = NULL, - alpha = NULL, beta = NULL, - warmstart = TRUE, nclus_start = NULL, - mu_start = NULL, sigma2_start = NULL, - M_start = NULL, S_start = NULL, - alpha_start = NULL, beta_start = NULL, - progress = TRUE, - seed = NULL) +sample_fiSAN <- function(nrep, burn, y, group, + maxK = 50, maxL = 50, + m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, + hyp_alpha1 = 1, hyp_alpha2 = 1, + hyp_beta1 = 6, hyp_beta2 = 3, + eps_beta = NULL, + alpha = NULL, beta = NULL, + warmstart = TRUE, nclus_start = NULL, + mu_start = NULL, sigma2_start = NULL, + M_start = NULL, S_start = NULL, + alpha_start = NULL, beta_start = NULL, + progress = TRUE, + seed = NULL) { group <- .relabell(group) - 1 @@ -167,13 +168,13 @@ sample_fiSAN <- function(nrep, y, group, set.seed(seed) params <- list(nrep = nrep, - y = y, - group = group+1, - maxK = maxK, - maxL = maxL, - m0 = m0, tau0 = tau0, - lambda0 = lambda0, gamma0 = gamma0, - seed = seed) + y = y, + group = group+1, + maxK = maxK, + maxL = maxL, + m0 = m0, tau0 = tau0, + lambda0 = lambda0, gamma0 = gamma0, + seed = seed) if(!is.null(alpha)) { params$alpha <- alpha } if(!is.null(beta)) { params$beta <- beta } @@ -226,9 +227,9 @@ sample_fiSAN <- function(nrep, y, group, if(is.null(nclus_start)) { nclus_start <- min(c(maxL, 30))} M_start <- stats::kmeans(y, - centers = nclus_start, - algorithm="MacQueen", - iter.max = 50)$cluster + centers = nclus_start, + algorithm="MacQueen", + iter.max = 50)$cluster nclus_start <- length(unique(M_start)) mu_start[1:nclus_start] <- sapply(1:nclus_start, function(x) mean(y[M_start == x])) @@ -246,14 +247,19 @@ sample_fiSAN <- function(nrep, y, group, fixed_alpha <- F fixed_beta <- F if(!is.null(alpha) ) { - fixed_alpha <- T ; } else { alpha <- 1 } + fixed_alpha <- T ; + alpha_start <- alpha + eps_alpha <- 1 + } else { alpha <- 1 } if(!is.null(beta) ) { - fixed_beta <- T ; eps_beta <- 1 } else { beta <- 1 } + beta_start <- beta + fixed_beta <- T ; + eps_beta <- 1 } else { beta <- 1 } if((fixed_beta == F)&(is.null(eps_beta))) {stop("Missing eps parameter for MH step on beta Please provide 'eps_beta' or a fixed 'beta' value.")} start <- Sys.time() - out <- sample_ficam_arma(nrep, y, group, + out <- sample_ficam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, @@ -277,34 +283,34 @@ sample_fiSAN <- function(nrep, y, group, if(length(warnings) == 2) { output <- list( "model" = "fiSAN", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxL and maxK: all the provided mixture components were used. Check $warnings to see when it happened.") } else if (length(warnings) == 1) { if((length(warnings$top_maxK)>0) & (length(warnings$top_maxL)==0)) { output <- list( "model" = "fiSAN", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxK: all the provided distributional mixture components were used. Check '$warnings' to see when it happened.") } if((length(warnings$top_maxK)==0) & (length(warnings$top_maxL)>0)) { output <- list( "model" = "fiSAN", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxL: all the provided observational mixture components were used. Check '$warnings' to see when it happened.") } } else { output <- list( "model" = "fiSAN", - "params" = params, - "sim" = out, - "time" = end - start ) + "params" = params, + "sim" = out, + "time" = end - start ) } structure(output, class = c("SANmcmc", class(output))) diff --git a/R/mcmc_fiSAN_sparsemix.R b/R/mcmc_fiSAN_sparse_burn.R similarity index 83% rename from R/mcmc_fiSAN_sparsemix.R rename to R/mcmc_fiSAN_sparse_burn.R index d2cb5ee..7a9a114 100644 --- a/R/mcmc_fiSAN_sparsemix.R +++ b/R/mcmc_fiSAN_sparse_burn.R @@ -6,7 +6,7 @@ #' The algorithm for the nonparametric component is based on the slice sampler for DPM of Kalli, Griffin and Walker (2011). #' #' @usage -#' sample_fiSAN_sparsemix(nrep, y, group, +#' sample_fiSAN_sparsemix(nrep, burn, y, group, #' maxK = 50, maxL = 50, #' m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, #' hyp_alpha1 = 1, hyp_alpha2 = 1, @@ -21,6 +21,7 @@ #' #' #' @param nrep Number of MCMC iterations. +#' @param burn Number of discarded iterations. #' @param y Vector of observations. #' @param group Vector of the same length of y indicating the group membership (numeric). #' @param maxK Maximum number of distributional clusters \eqn{K} (default = 50). @@ -129,7 +130,7 @@ #' g <- c(rep(1,30), rep(2, 30)) #' plot(density(y[g==1]), xlim = c(-5,10)) #' lines(density(y[g==2]), col = 2) -#' out <- sample_fiSAN_sparsemix(nrep = 500, y = y, group = g, +#' out <- sample_fiSAN_sparsemix(nrep = 500, burn = 200, y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' beta = 0.01) @@ -145,19 +146,19 @@ #' #' @export sample_fiSAN_sparsemix #' @importFrom stats cor var dist hclust cutree rgamma -sample_fiSAN_sparsemix <- function(nrep, y, group, - maxK = 50, maxL = 50, - m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, - hyp_alpha1 = 1, hyp_alpha2 = 1, - hyp_beta = 10, - eps_beta = NULL, - alpha = NULL, beta = NULL, - warmstart = TRUE, nclus_start = NULL, - mu_start = NULL, sigma2_start = NULL, - M_start = NULL, S_start = NULL, - alpha_start = NULL, beta_start = NULL, - progress = TRUE, - seed = NULL) +sample_fiSAN_sparsemix <- function(nrep, burn, y, group, + maxK = 50, maxL = 50, + m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, + hyp_alpha1 = 1, hyp_alpha2 = 1, + hyp_beta = 10, + eps_beta = NULL, + alpha = NULL, beta = NULL, + warmstart = TRUE, nclus_start = NULL, + mu_start = NULL, sigma2_start = NULL, + M_start = NULL, S_start = NULL, + alpha_start = NULL, beta_start = NULL, + progress = TRUE, + seed = NULL) { group <- .relabell(group) - 1 @@ -165,13 +166,13 @@ sample_fiSAN_sparsemix <- function(nrep, y, group, set.seed(seed) params <- list(nrep = nrep, - y = y, - group = group+1, - maxK = maxK, - maxL = maxL, - m0 = m0, tau0 = tau0, - lambda0 = lambda0, gamma0 = gamma0, - seed = seed) + y = y, + group = group+1, + maxK = maxK, + maxL = maxL, + m0 = m0, tau0 = tau0, + lambda0 = lambda0, gamma0 = gamma0, + seed = seed) if(!is.null(alpha)) { params$alpha <- alpha } if(!is.null(beta)) { params$beta <- beta } @@ -223,9 +224,9 @@ sample_fiSAN_sparsemix <- function(nrep, y, group, if(is.null(nclus_start)) { nclus_start <- min(c(maxL, 30))} M_start <- stats::kmeans(y, - centers = nclus_start, - algorithm="MacQueen", - iter.max = 50)$cluster + centers = nclus_start, + algorithm="MacQueen", + iter.max = 500)$cluster nclus_start <- length(unique(M_start)) mu_start[1:nclus_start] <- sapply(1:nclus_start, function(x) mean(y[M_start == x])) @@ -243,28 +244,33 @@ sample_fiSAN_sparsemix <- function(nrep, y, group, fixed_alpha <- F fixed_beta <- F if(!is.null(alpha) ) { - fixed_alpha <- T } else { alpha <- 1 } + fixed_alpha <- T ; + alpha_start <- alpha + } else { alpha <- 1 } + if(!is.null(beta) ) { - fixed_beta <- T ; eps_beta <- 99 } else { beta <- 1 } + beta_start <- beta + fixed_beta <- T ; + eps_beta <- 1 } else { beta <- 1 } if((fixed_beta == F)&(is.null(eps_beta))) {stop("Missing eps parameter for MH step on beta Please provide 'eps_beta' or a fixed 'beta' value.")} - + start <- Sys.time() - out <- sample_overficam_arma(nrep, y, group, - maxK, maxL, - m0, tau0, - lambda0, gamma0, - fixed_alpha, fixed_beta, - alpha, beta, - hyp_alpha1, hyp_alpha2, - hyp_beta, - mu_start, sigma2_start, - M_start, S_start, - alpha_start, beta_start, - eps_beta, - progress) + out <- sample_overficam_burn(nrep, burn, y, group, + maxK, maxL, + m0, tau0, + lambda0, gamma0, + fixed_alpha, fixed_beta, + alpha, beta, + hyp_alpha1, hyp_alpha2, + hyp_beta, + mu_start, sigma2_start, + M_start, S_start, + alpha_start, beta_start, + eps_beta, + progress) end <- Sys.time() warnings <- out$warnings @@ -275,16 +281,16 @@ sample_fiSAN_sparsemix <- function(nrep, y, group, if(length(warnings) == 1) { output <- list( "model" = "fiSAN_sparsemix", - "params" = params, - "sim" = out, - "time" = end - start, - "warnings" = warnings) + "params" = params, + "sim" = out, + "time" = end - start, + "warnings" = warnings) warning("Increase maxK: all the provided distributional mixture components were used. Check '$warnings' to see when it happened.") } else { output <- list( "model" = "fiSAN_sparsemix", - "params" = params, - "sim" = out, - "time" = end - start ) + "params" = params, + "sim" = out, + "time" = end - start ) } structure(output, class = c("SANmcmc",class(output))) diff --git a/R/plot.R b/R/plot.R index 12b1ccc..59b19fb 100644 --- a/R/plot.R +++ b/R/plot.R @@ -21,7 +21,8 @@ #' set.seed(123) #' y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) #' g <- c(rep(1,30), rep(2, 30)) -#' out <- sample_fiSAN(nrep = 500, y = y, group = g, +#' out <- sample_fiSAN(nrep = 500, burn = 200, +#' y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' beta = 1) @@ -36,23 +37,24 @@ plot.SANmcmc <- function(x, type = c("boxplot", "ecdf", "scatter"), estimated_clusters = NULL, - burnin = NULL, - palette_brewed = FALSE, ncores = 0, ...) + burnin = 0, + palette_brewed = FALSE, ncores = 1, ...) { type <- match.arg(type) if(!is.null(estimated_clusters)) { - burnin <- 1:(x$params$nrep - nrow(attr(estimated_clusters$est_oc, "draws"))) estimated_oc <- estimated_clusters$est_oc estimated_dc <- estimated_clusters$est_dc - } else { - if(is.null(burnin)) { burnin <- 1:round(x$params$nrep/3*2) - } else { burnin <- 1:burnin } - } - - if(is.null(estimated_clusters)) { - estimated_oc <- suppressWarnings(salso::salso(x$sim$obs_cluster[-burnin,], nCores = ncores)) - estimated_dc <- suppressWarnings(salso::salso(x$sim$distr_cluster[-burnin,], nCores = ncores)) + } else if(is.null(estimated_clusters)) { + if(burnin>0) { + OC <- x$sim$obs_cluster[-burnin,] + DC <- x$sim$distr_cluster[-burnin,] + }else{ + OC <- x$sim$obs_cluster + DC <- x$sim$distr_cluster + } + estimated_oc <- suppressWarnings(salso::salso(OC, nCores = ncores)) + estimated_dc <- suppressWarnings(salso::salso(DC, nCores = ncores)) } posterior_means <- tapply(x$params$y, estimated_oc, mean) diff --git a/R/traceplot.R b/R/traceplot.R index 70b301f..795acd4 100644 --- a/R/traceplot.R +++ b/R/traceplot.R @@ -27,7 +27,8 @@ #' set.seed(123) #' y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) #' g <- c(rep(1,30), rep(2, 30)) -#' out <- sample_fiSAN(nrep = 500, y = y, group = g, +#' out <- sample_fiSAN(nrep = 500, burn = 200, +#' y = y, group = g, #' nclus_start = 2, #' maxK = 20, maxL = 20, #' beta = 1) diff --git a/README.Rmd b/README.Rmd index 25b37dd..c13da3c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -48,9 +48,9 @@ y <- c(rnorm(50,-5,1), rnorm(170,0,1),rnorm(70,5,1)) g <- c(rep(1,150), rep(2, 140)) plot(density(y[g==1]), xlim = c(-10,10), main = "", xlab = "") lines(density(y[g==2]), col = "cyan4") -out <- sample_fiSAN(nrep = 3000, y = y, group = g, beta = 1) +out <- sample_fiSAN(nrep = 3000, burn = 1000, y = y, group = g, beta = 1) out -clusters <- estimate_clusters(out, burnin = 2000) +clusters <- estimate_clusters(out) clusters plot(out, estimated_clusters = clusters) ``` diff --git a/cran-comments.md b/cran-comments.md index 6e260a0..60d6785 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,11 +1,13 @@ ## SANple 0.1.1 -In this release - -* we improved efficiency of stick-breaking computation; -* we improved initialization of the algorithms, and streamlined some scripts; -* we changed cpp `for loops` indexes from int to unsigned int when needed; -* we fixed a bug in the full conditional of the concentration parameter for the observational DP of CAM. +In this release we + +* Updated the MCMC functions to take a `burn in` value as input; +* Some algorithms ran `nrep-1` iterations. Updated to `nrep`; +* Improved efficiency of stick-breaking computation; +* Improved the initialization of the algorithms, streamlined some scripts; +* Changed `.cpp` `for loops` indexes from `int` to `unsigned int` when needed; +* Fixed a bug in the full conditional of the concentration parameter for the observational DP of CAM. ## R CMD check results diff --git a/man/estimate_clusters.Rd b/man/estimate_clusters.Rd index e0255f1..719bb04 100644 --- a/man/estimate_clusters.Rd +++ b/man/estimate_clusters.Rd @@ -4,7 +4,7 @@ \alias{estimate_clusters} \title{Estimate observational and distributional clusters} \usage{ -estimate_clusters(object, burnin = NULL, ncores = 0) +estimate_clusters(object, burnin = 0, ncores = 0) } \arguments{ \item{object}{object of class \code{SANmcmc} (the result of a call to \code{\link{sample_fiSAN}}, \code{\link{sample_fiSAN_sparsemix}}, @@ -32,7 +32,8 @@ Given the MCMC output, estimate the observational and distributional partitions set.seed(123) y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) -out <- sample_fiSAN(nrep = 500, y = y, group = g, +out <- sample_fiSAN(nrep = 500, burn = 200, + y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, beta = 1) diff --git a/man/plot.SANmcmc.Rd b/man/plot.SANmcmc.Rd index 5df0457..2582505 100644 --- a/man/plot.SANmcmc.Rd +++ b/man/plot.SANmcmc.Rd @@ -8,9 +8,9 @@ x, type = c("boxplot", "ecdf", "scatter"), estimated_clusters = NULL, - burnin = NULL, + burnin = 0, palette_brewed = FALSE, - ncores = 0, + ncores = 1, ... ) } @@ -42,7 +42,8 @@ The function displays two graphs, meant to analyze the estimated distributional set.seed(123) y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) -out <- sample_fiSAN(nrep = 500, y = y, group = g, +out <- sample_fiSAN(nrep = 500, burn = 200, + y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, beta = 1) diff --git a/man/sample_CAM.Rd b/man/sample_CAM.Rd index 97a8786..09841ec 100644 --- a/man/sample_CAM.Rd +++ b/man/sample_CAM.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcmc_CAM.R +% Please edit documentation in R/mcmc_CAM_burn.R \name{sample_CAM} \alias{sample_CAM} \title{Sample CAM} \usage{ -sample_CAM(nrep, y, group, +sample_CAM(nrep, burn, y, group, maxK = 50, maxL = 50, m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, hyp_alpha1 = 1, hyp_alpha2 = 1, @@ -19,6 +19,8 @@ sample_CAM(nrep, y, group, \arguments{ \item{nrep}{Number of MCMC iterations.} +\item{burn}{Number of discarded iterations.} + \item{y}{Vector of observations.} \item{group}{Vector of the same length of y indicating the group membership (numeric).} @@ -143,7 +145,7 @@ y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) plot(density(y[g==1]), xlim = c(-5,10)) lines(density(y[g==2]), col = 2) -out <- sample_CAM(nrep = 500, y = y, group = g, +out <- sample_CAM(nrep = 500, burn = 200, y = y, group = g, nclus_start = 2, maxL = 20, maxK = 20) out diff --git a/man/sample_fSAN.Rd b/man/sample_fSAN.Rd index 6b27959..1f96ce8 100644 --- a/man/sample_fSAN.Rd +++ b/man/sample_fSAN.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcmc_fSAN.R +% Please edit documentation in R/mcmc_fSAN_burn.R \name{sample_fSAN} \alias{sample_fSAN} \title{Sample fSAN} \usage{ -sample_fSAN(nrep, y, group, +sample_fSAN(nrep, burn, y, group, maxK = 50, maxL = 50, m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, hyp_alpha1 = 6, hyp_alpha2 = 3, @@ -20,6 +20,8 @@ sample_fSAN(nrep, y, group, \arguments{ \item{nrep}{Number of MCMC iterations.} +\item{burn}{Number of discarded iterations.} + \item{y}{Vector of observations.} \item{group}{Vector of the same length of y indicating the group membership (numeric).} @@ -144,7 +146,8 @@ y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) plot(density(y[g==1]), xlim = c(-5,10)) lines(density(y[g==2]), col = 2) -out <- sample_fSAN(nrep = 500, y = y, group = g, +out <- sample_fSAN(nrep = 500, burn = 200, + y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, alpha = 1, beta = 1) diff --git a/man/sample_fSAN_sparsemix.Rd b/man/sample_fSAN_sparsemix.Rd index 4150e3b..c8342c3 100644 --- a/man/sample_fSAN_sparsemix.Rd +++ b/man/sample_fSAN_sparsemix.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcmc_fSAN_sparsemix.R +% Please edit documentation in R/mcmc_fSAN_sparse_burn.R \name{sample_fSAN_sparsemix} \alias{sample_fSAN_sparsemix} \title{Sample fSAN with sparse mixtures} \usage{ -sample_fSAN_sparsemix(nrep, y, group, +sample_fSAN_sparsemix(nrep, burn, y, group, maxK = 50, maxL = 50, m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, hyp_alpha = 10, hyp_beta = 10, @@ -19,6 +19,8 @@ sample_fSAN_sparsemix(nrep, y, group, \arguments{ \item{nrep}{Number of MCMC iterations.} +\item{burn}{Number of discarded iterations.} + \item{y}{Vector of observations.} \item{group}{Vector of the same length of y indicating the group membership (numeric).} @@ -139,7 +141,7 @@ y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) plot(density(y[g==1]), xlim = c(-5,10)) lines(density(y[g==2]), col = 2) -out <- sample_fSAN_sparsemix(nrep = 500, y = y, group = g, +out <- sample_fSAN_sparsemix(nrep = 500, burn = 200, y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, alpha = 0.01, beta = 0.01) diff --git a/man/sample_fiSAN.Rd b/man/sample_fiSAN.Rd index 439c188..564ec72 100644 --- a/man/sample_fiSAN.Rd +++ b/man/sample_fiSAN.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcmc_fiSAN.R +% Please edit documentation in R/mcmc_fiSAN_burn.R \name{sample_fiSAN} \alias{sample_fiSAN} \title{Sample fiSAN} \usage{ -sample_fiSAN(nrep, y, group, +sample_fiSAN(nrep, burn, y, group, maxK = 50, maxL = 50, m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, hyp_alpha1 = 1, hyp_alpha2 = 1, @@ -20,6 +20,8 @@ sample_fiSAN(nrep, y, group, \arguments{ \item{nrep}{Number of MCMC iterations.} +\item{burn}{Number of discarded iterations.} + \item{y}{Vector of observations.} \item{group}{Vector of the same length of y indicating the group membership (numeric).} @@ -146,7 +148,7 @@ y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) plot(density(y[g==1]), xlim = c(-5,10)) lines(density(y[g==2]), col = 2) -out <- sample_fiSAN(nrep = 500, y = y, group = g, +out <- sample_fiSAN(nrep = 500, burn = 200, y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, beta = 1) diff --git a/man/sample_fiSAN_sparsemix.Rd b/man/sample_fiSAN_sparsemix.Rd index 69d75f4..a7b3060 100644 --- a/man/sample_fiSAN_sparsemix.Rd +++ b/man/sample_fiSAN_sparsemix.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcmc_fiSAN_sparsemix.R +% Please edit documentation in R/mcmc_fiSAN_sparse_burn.R \name{sample_fiSAN_sparsemix} \alias{sample_fiSAN_sparsemix} \title{Sample fiSAN with sparse mixtures} \usage{ -sample_fiSAN_sparsemix(nrep, y, group, +sample_fiSAN_sparsemix(nrep, burn, y, group, maxK = 50, maxL = 50, m0 = 0, tau0 = 0.1, lambda0 = 3, gamma0 = 2, hyp_alpha1 = 1, hyp_alpha2 = 1, @@ -20,6 +20,8 @@ sample_fiSAN_sparsemix(nrep, y, group, \arguments{ \item{nrep}{Number of MCMC iterations.} +\item{burn}{Number of discarded iterations.} + \item{y}{Vector of observations.} \item{group}{Vector of the same length of y indicating the group membership (numeric).} @@ -144,7 +146,7 @@ y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) plot(density(y[g==1]), xlim = c(-5,10)) lines(density(y[g==2]), col = 2) -out <- sample_fiSAN_sparsemix(nrep = 500, y = y, group = g, +out <- sample_fiSAN_sparsemix(nrep = 500, burn = 200, y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, beta = 0.01) diff --git a/man/traceplot.Rd b/man/traceplot.Rd index 0ec6f98..499a003 100644 --- a/man/traceplot.Rd +++ b/man/traceplot.Rd @@ -40,7 +40,8 @@ The function is not available for the observational weights \eqn{\omega}. set.seed(123) y <- c(rnorm(40,0,0.3), rnorm(20,5,0.3)) g <- c(rep(1,30), rep(2, 30)) -out <- sample_fiSAN(nrep = 500, y = y, group = g, +out <- sample_fiSAN(nrep = 500, burn = 200, + y = y, group = g, nclus_start = 2, maxK = 20, maxL = 20, beta = 1) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7d51c0a..6875e55 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,13 +11,14 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif -// sample_cam_arma -Rcpp::List sample_cam_arma(int nrep, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta1, double hyp_beta2, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, bool progressbar); -RcppExport SEXP _SANple_sample_cam_arma(SEXP nrepSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_beta1SEXP, SEXP hyp_beta2SEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP progressbarSEXP) { +// sample_cam_burn +Rcpp::List sample_cam_burn(int nrep, int burn, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta1, double hyp_beta2, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, bool progressbar); +RcppExport SEXP _SANple_sample_cam_burn(SEXP nrepSEXP, SEXP burnSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_beta1SEXP, SEXP hyp_beta2SEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP progressbarSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< int >::type nrep(nrepSEXP); + Rcpp::traits::input_parameter< int >::type burn(burnSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< const arma::vec& >::type group(groupSEXP); Rcpp::traits::input_parameter< int >::type maxK(maxKSEXP); @@ -41,17 +42,18 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type alpha_start(alpha_startSEXP); Rcpp::traits::input_parameter< double >::type beta_start(beta_startSEXP); Rcpp::traits::input_parameter< bool >::type progressbar(progressbarSEXP); - rcpp_result_gen = Rcpp::wrap(sample_cam_arma(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, progressbar)); + rcpp_result_gen = Rcpp::wrap(sample_cam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, progressbar)); return rcpp_result_gen; END_RCPP } -// sample_fcam_arma -Rcpp::List sample_fcam_arma(int nrep, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta1, double hyp_beta2, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_alpha, double eps_beta, bool progressbar); -RcppExport SEXP _SANple_sample_fcam_arma(SEXP nrepSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_beta1SEXP, SEXP hyp_beta2SEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_alphaSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { +// sample_fcam_burn +Rcpp::List sample_fcam_burn(int nrep, int burn, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta1, double hyp_beta2, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_alpha, double eps_beta, bool progressbar); +RcppExport SEXP _SANple_sample_fcam_burn(SEXP nrepSEXP, SEXP burnSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_beta1SEXP, SEXP hyp_beta2SEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_alphaSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< int >::type nrep(nrepSEXP); + Rcpp::traits::input_parameter< int >::type burn(burnSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< const arma::vec& >::type group(groupSEXP); Rcpp::traits::input_parameter< int >::type maxK(maxKSEXP); @@ -77,17 +79,18 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type eps_alpha(eps_alphaSEXP); Rcpp::traits::input_parameter< double >::type eps_beta(eps_betaSEXP); Rcpp::traits::input_parameter< bool >::type progressbar(progressbarSEXP); - rcpp_result_gen = Rcpp::wrap(sample_fcam_arma(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar)); + rcpp_result_gen = Rcpp::wrap(sample_fcam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar)); return rcpp_result_gen; END_RCPP } -// sample_ficam_arma -Rcpp::List sample_ficam_arma(int nrep, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta1, double hyp_beta2, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_beta, bool progressbar); -RcppExport SEXP _SANple_sample_ficam_arma(SEXP nrepSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_beta1SEXP, SEXP hyp_beta2SEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { +// sample_ficam_burn +Rcpp::List sample_ficam_burn(int nrep, int burn, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta1, double hyp_beta2, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_beta, bool progressbar); +RcppExport SEXP _SANple_sample_ficam_burn(SEXP nrepSEXP, SEXP burnSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_beta1SEXP, SEXP hyp_beta2SEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< int >::type nrep(nrepSEXP); + Rcpp::traits::input_parameter< int >::type burn(burnSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< const arma::vec& >::type group(groupSEXP); Rcpp::traits::input_parameter< int >::type maxK(maxKSEXP); @@ -112,17 +115,18 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type beta_start(beta_startSEXP); Rcpp::traits::input_parameter< double >::type eps_beta(eps_betaSEXP); Rcpp::traits::input_parameter< bool >::type progressbar(progressbarSEXP); - rcpp_result_gen = Rcpp::wrap(sample_ficam_arma(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar)); + rcpp_result_gen = Rcpp::wrap(sample_ficam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta1, hyp_beta2, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar)); return rcpp_result_gen; END_RCPP } -// sample_overcam_arma -Rcpp::List sample_overcam_arma(int nrep, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha, double hyp_beta, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_alpha, double eps_beta, bool progressbar); -RcppExport SEXP _SANple_sample_overcam_arma(SEXP nrepSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alphaSEXP, SEXP hyp_betaSEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_alphaSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { +// sample_overcam_burn +Rcpp::List sample_overcam_burn(int nrep, int burn, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha, double hyp_beta, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_alpha, double eps_beta, bool progressbar); +RcppExport SEXP _SANple_sample_overcam_burn(SEXP nrepSEXP, SEXP burnSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alphaSEXP, SEXP hyp_betaSEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_alphaSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< int >::type nrep(nrepSEXP); + Rcpp::traits::input_parameter< int >::type burn(burnSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< const arma::vec& >::type group(groupSEXP); Rcpp::traits::input_parameter< int >::type maxK(maxKSEXP); @@ -146,17 +150,18 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type eps_alpha(eps_alphaSEXP); Rcpp::traits::input_parameter< double >::type eps_beta(eps_betaSEXP); Rcpp::traits::input_parameter< bool >::type progressbar(progressbarSEXP); - rcpp_result_gen = Rcpp::wrap(sample_overcam_arma(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar)); + rcpp_result_gen = Rcpp::wrap(sample_overcam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_alpha, eps_beta, progressbar)); return rcpp_result_gen; END_RCPP } -// sample_overficam_arma -Rcpp::List sample_overficam_arma(int nrep, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_beta, bool progressbar); -RcppExport SEXP _SANple_sample_overficam_arma(SEXP nrepSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_betaSEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { +// sample_overficam_burn +Rcpp::List sample_overficam_burn(int nrep, int burn, const arma::vec& y, const arma::vec& group, int maxK, int maxL, double m0, double tau0, double lambda0, double gamma0, bool fixed_alpha, bool fixed_beta, double alpha, double beta, double hyp_alpha1, double hyp_alpha2, double hyp_beta, arma::vec mu_start, arma::vec sigma2_start, arma::vec M_start, arma::vec S_start, double alpha_start, double beta_start, double eps_beta, bool progressbar); +RcppExport SEXP _SANple_sample_overficam_burn(SEXP nrepSEXP, SEXP burnSEXP, SEXP ySEXP, SEXP groupSEXP, SEXP maxKSEXP, SEXP maxLSEXP, SEXP m0SEXP, SEXP tau0SEXP, SEXP lambda0SEXP, SEXP gamma0SEXP, SEXP fixed_alphaSEXP, SEXP fixed_betaSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP hyp_alpha1SEXP, SEXP hyp_alpha2SEXP, SEXP hyp_betaSEXP, SEXP mu_startSEXP, SEXP sigma2_startSEXP, SEXP M_startSEXP, SEXP S_startSEXP, SEXP alpha_startSEXP, SEXP beta_startSEXP, SEXP eps_betaSEXP, SEXP progressbarSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< int >::type nrep(nrepSEXP); + Rcpp::traits::input_parameter< int >::type burn(burnSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< const arma::vec& >::type group(groupSEXP); Rcpp::traits::input_parameter< int >::type maxK(maxKSEXP); @@ -180,17 +185,17 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type beta_start(beta_startSEXP); Rcpp::traits::input_parameter< double >::type eps_beta(eps_betaSEXP); Rcpp::traits::input_parameter< bool >::type progressbar(progressbarSEXP); - rcpp_result_gen = Rcpp::wrap(sample_overficam_arma(nrep, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar)); + rcpp_result_gen = Rcpp::wrap(sample_overficam_burn(nrep, burn, y, group, maxK, maxL, m0, tau0, lambda0, gamma0, fixed_alpha, fixed_beta, alpha, beta, hyp_alpha1, hyp_alpha2, hyp_beta, mu_start, sigma2_start, M_start, S_start, alpha_start, beta_start, eps_beta, progressbar)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_SANple_sample_cam_arma", (DL_FUNC) &_SANple_sample_cam_arma, 24}, - {"_SANple_sample_fcam_arma", (DL_FUNC) &_SANple_sample_fcam_arma, 26}, - {"_SANple_sample_ficam_arma", (DL_FUNC) &_SANple_sample_ficam_arma, 25}, - {"_SANple_sample_overcam_arma", (DL_FUNC) &_SANple_sample_overcam_arma, 24}, - {"_SANple_sample_overficam_arma", (DL_FUNC) &_SANple_sample_overficam_arma, 24}, + {"_SANple_sample_cam_burn", (DL_FUNC) &_SANple_sample_cam_burn, 25}, + {"_SANple_sample_fcam_burn", (DL_FUNC) &_SANple_sample_fcam_burn, 27}, + {"_SANple_sample_ficam_burn", (DL_FUNC) &_SANple_sample_ficam_burn, 26}, + {"_SANple_sample_overcam_burn", (DL_FUNC) &_SANple_sample_overcam_burn, 25}, + {"_SANple_sample_overficam_burn", (DL_FUNC) &_SANple_sample_overficam_burn, 25}, {NULL, NULL, 0} }; diff --git a/src/funs_cam.cpp b/src/funs_cam.cpp index 4c3a5cf..7402adb 100644 --- a/src/funs_cam.cpp +++ b/src/funs_cam.cpp @@ -34,6 +34,25 @@ arma::vec stick_breaking(arma::vec beta_var) return(out) ; } +/* older version - to be removed +arma::vec stick_breaking(arma::vec beta_var) +{ + int len = beta_var.n_elem ; + arma::vec out(len) ; + + out(0) = beta_var(0) ; + for(int k = 1; k < len; k++) + { + arma::vec tmp(k) ; + for(int t = 0; t < k; t++) { tmp(t) = log(1 - beta_var(t)) ; } + double tmp1 = arma::accu(tmp) ; + if(!arma::is_finite(tmp1)) { tmp1 = log(0) ;} + out(k) = exp(log(beta_var(k)) + tmp1) ; + } + return(out) ; +} + */ + // This function performs steps 1-4 of Algorithm 1 of Denti et al. (2021): // - sample observational and distributional slice variables, @@ -331,7 +350,7 @@ double sample_beta(double hyp_beta1, double hyp_beta2, bar_Ms(k) = subM.max() ; for(int l = 0; l < subM.max(); l++) { - sum_log_mbeta = sum_log_mbeta + log( 1-obs_beta_rv(l,k) ) ; + sum_log_mbeta += log( 1-obs_beta_rv(l,k) ) ; } } } diff --git a/src/main_cam.cpp b/src/main_cam.cpp deleted file mode 100644 index 4634371..0000000 --- a/src/main_cam.cpp +++ /dev/null @@ -1,220 +0,0 @@ -#include -#include "funs_cam.h" -// [[Rcpp::depends(RcppProgress)]] -#include -#include - -// [[Rcpp::export]] -Rcpp::List sample_cam_arma(int nrep, // number of replications of the Gibbs sampler - const arma::vec & y, // input data - const arma::vec & group, // group assignment for each observation in the vector y - int maxK, // maximum number of distributional clusters - for memory allocation - int maxL, // maximum number of observational clusters - for memory allocation - double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) - double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) - bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? - double alpha, double beta, // if alpha or/and beta fixed - double hyp_alpha1, double hyp_alpha2, // hyperparameters of alpha ( par of the distributional DP ) - if alpha not fixed - double hyp_beta1, double hyp_beta2,// hyperparameters of beta ( par of the observational DP ) - if beta not fixed - arma::vec mu_start, - arma::vec sigma2_start, - arma::vec M_start, - arma::vec S_start, - double alpha_start, - double beta_start, - bool progressbar - ) -{ - int N = y.n_elem ; - arma::vec unique_groups = arma::unique(group) ; - int G = unique_groups.n_elem ; - - // allocate output matrices - arma::mat mu(maxL, nrep, arma::fill::zeros) ; - arma::mat sigma2(maxL, nrep, arma::fill::ones) ; - arma::mat out_S(G, nrep) ; // distributional clusters - arma::mat out_M(N, nrep) ; // observational clusters - arma::mat pi(maxK, nrep, arma::fill::zeros) ; - arma::cube omega(maxL, maxK, nrep, arma::fill::zeros) ; - arma::vec out_alpha(nrep) ; // DP parameter for the distributional weights - arma::vec out_beta(nrep) ; // DP parameter for the observational weights - - // initialization - mu.col(0) = mu_start ; - sigma2.col(0) = sigma2_start ; - out_M.col(0) = M_start ; - out_S.col(0) = S_start ; - pi.col(0).fill(1/(maxK*1.0)) ; - omega.slice(0).fill(1/(maxL*1.0)) ; - - if(fixed_alpha) { - out_alpha.fill(alpha) ; - } else { - out_alpha(0) = alpha_start ; - } - - if(fixed_beta) { - out_beta.fill(beta) ; - } else { - out_beta(0) = beta_start ; - } - - - - // auxiliary quantities - arma::vec clusterD_long(N) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), 0 ) ; } - Rcpp::List out_params ; - - // slice variables - Rcpp::List weights_slice_sampler ; - arma::vec out_maxK(nrep) ; out_maxK(0) = maxK ; - arma::vec out_maxL(nrep) ; out_maxL(0) = maxL ; - arma::vec xi( std::max(maxL, maxK) ) ; - for(int l = 0; l < std::max(maxL, maxK); l++) { xi(l) = fun_xi(0.5, l+1) ; } - - bool warningL = false ; arma::vec iters_maxL(nrep-1) ; int countL = 0 ; - bool warningK = false ; arma::vec iters_maxK(nrep-1) ; int countK = 0 ; - - // progress bar - bool display_progress = progressbar ; - Progress p(nrep, display_progress) ; - - - /* START */ - for(int iter = 0; iter < nrep -1 ; iter++) - { - if( Progress::check_abort() ) { return -1.0 ; } - p.increment(); - - - /*---------------------------------------------*/ - /* - * UPDATE MIXTURE WEIGHTS - */ - weights_slice_sampler = weights_update_slice_sampler(y, group, - out_S.col(iter), out_M.col(iter), - clusterD_long, - xi, - out_alpha(iter), out_beta(iter), - maxK, maxL) ; - - arma::vec tmp_pi = weights_slice_sampler["new_pi"] ; - pi.col(iter+1) = tmp_pi ; - - arma::mat tmp_omega = weights_slice_sampler["new_omega"] ; - omega.slice(iter+1) = tmp_omega ; - - int maxK_new = weights_slice_sampler["new_maxK"] ; - out_maxK(iter+1) = maxK_new ; - if(out_maxK(iter + 1) >= maxK) { - warningK = true ; - iters_maxK(countK) = (iter + 1) ; - countK++ ; - out_maxK(iter + 1) = maxK-1 ; } - - int maxL_new = weights_slice_sampler["new_maxL"] ; - out_maxL(iter+1) = maxL_new ; - if(out_maxL(iter + 1) >= maxL) { - warningL = true ; - iters_maxL(countL) = (iter + 1) ; - countL++ ; - out_maxL(iter + 1) = maxL-1 ; } - - arma::mat obs_beta_rv = weights_slice_sampler["obs_beta_rv"] ; - arma::vec u_O = weights_slice_sampler["u_O"] ; - arma::vec u_D = weights_slice_sampler["u_D"] ; - - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE CLUSTER ASSIGNMENT - */ - - /* update distributional clusters S */ - out_S.col(iter+1) = slicedDP_sample_distr_cluster(group, - out_M.col(iter), - pi.col(iter+1), omega.slice(iter+1), - u_D, xi, - out_maxK(iter+1)) ; - out_S.col(iter+1) = out_S.col(iter+1) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), iter+1 ) ; } - - /* update observational clusters M */ - out_M.col(iter+1) = slicedDP_sample_obs_cluster(y, - clusterD_long, - omega.slice(iter+1), - u_O, xi, - out_maxL(iter+1), - mu.col(iter), sigma2.col(iter)) ; - - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE PARAMETERS - */ - out_params = sample_model_parameters(y, out_M.col(iter+1), - maxL, - m0, tau0, lambda0, gamma0) ; - - arma::vec tmp_mu = out_params["out_mu"] ; - mu.col(iter+1) = tmp_mu ; - - arma::vec tmp_sigma2 = out_params["out_sigma2"] ; - sigma2.col(iter+1) = tmp_sigma2 ; - - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE DIRICHLET HYPER-PARAMETERS - */ - - /* sample alpha */ - if(!fixed_alpha) { - out_alpha(iter+1) = sample_alpha(out_alpha(iter), - hyp_alpha1, hyp_alpha2, - G, out_S.col(iter+1)) ; - } - - /* sample beta */ - if(!fixed_beta) { - out_beta(iter+1) = sample_beta(hyp_beta1, hyp_beta2, - out_S.col(iter+1), out_M.col(iter+1), - clusterD_long, - obs_beta_rv) ; - } - - /*---------------------------------------------*/ - - // END - } - - Rcpp::List warnings ; - if(warningK & !(warningL)) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); - } - if(warningL & !(warningK)) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); - } - if(warningL & warningK) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK), - Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); - } - - return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), - Rcpp::Named("sigma2") = sigma2.t(), - Rcpp::Named("obs_cluster") = out_M.t(), - Rcpp::Named("distr_cluster") = out_S.t(), - Rcpp::Named("pi") = pi.t(), - Rcpp::Named("omega") = omega, - Rcpp::Named("alpha") = out_alpha, - Rcpp::Named("beta") = out_beta, - Rcpp::Named("maxK") = out_maxK, - Rcpp::Named("maxL") = out_maxL, - Rcpp::Named("warnings") = warnings); -} diff --git a/src/main_cam_burn.cpp b/src/main_cam_burn.cpp new file mode 100644 index 0000000..189cf2b --- /dev/null +++ b/src/main_cam_burn.cpp @@ -0,0 +1,272 @@ +#include +#include "funs_cam.h" +// [[Rcpp::depends(RcppProgress)]] +#include +#include + +// [[Rcpp::export]] +Rcpp::List sample_cam_burn(int nrep, // number of replications of the Gibbs sampler + int burn, + const arma::vec & y, // input data + const arma::vec & group, // group assignment for each observation in the vector y + int maxK, // maximum number of distributional clusters - for memory allocation + int maxL, // maximum number of observational clusters - for memory allocation + double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) + double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) + bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? + double alpha, double beta, // if alpha or/and beta fixed + double hyp_alpha1, double hyp_alpha2, // hyperparameters of alpha ( par of the distributional DP ) - if alpha not fixed + double hyp_beta1, double hyp_beta2,// hyperparameters of beta ( par of the observational DP ) - if beta not fixed + arma::vec mu_start, + arma::vec sigma2_start, + arma::vec M_start, + arma::vec S_start, + double alpha_start, + double beta_start, + bool progressbar +) +{ + int N = y.n_elem ; + arma::vec unique_groups = arma::unique(group) ; + int G = unique_groups.n_elem ; + + //Rcpp::Rcout << nrep-burn; + + // allocate output matrices + arma::mat mu(maxL, nrep - burn, arma::fill::zeros) ; + arma::mat sigma2(maxL, nrep - burn, arma::fill::ones) ; + arma::mat out_S(G, nrep - burn) ; // distributional clusters + arma::mat out_M(N, nrep - burn) ; // observational clusters + arma::mat out_pi(maxK, nrep - burn, arma::fill::zeros) ; + arma::cube out_omega(maxL, maxK, nrep - burn, arma::fill::zeros) ; + arma::vec out_alpha(nrep - burn) ; // DP parameter for the distributional weights + arma::vec out_beta(nrep - burn) ; // DP parameter for the observational weights + arma::vec out_maxK(nrep - burn) ; + arma::vec out_maxL(nrep - burn) ; + + // initialization + arma::vec current_mu = mu_start ; + arma::vec current_sigma2 = sigma2_start ; + arma::vec current_M = M_start; + arma::vec current_S = S_start; + arma::vec current_pi(maxK) ; + current_pi.fill(1.0/(maxK)); + arma::mat current_omega(maxL,maxK) ; current_omega.fill(1.0/(maxL)); + int current_maxK = maxK ; + int current_maxL = maxL ; + /* + mu.col(0) = mu_start ; + sigma2.col(0) = sigma2_start ; + out_M.col(0) = M_start ; + out_S.col(0) = S_start ; + pi.col(0).fill(1/(maxK*1.0)) ; + omega.slice(0).fill(1/(maxL*1.0)) ; + */ + + double current_beta = 0; + double current_alpha = 0; + + if(fixed_alpha) { + out_alpha.fill(alpha) ; + current_alpha = alpha_start ; + } else { + current_alpha = alpha_start ; + } + + if(fixed_beta) { + out_beta.fill(beta) ; + current_beta = beta_start; + } else { + current_beta = beta_start ; + } + + + + // auxiliary quantities + arma::vec clusterD_long(N) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = S_start( group(i) ) ; } + Rcpp::List out_params ; + + // slice variables + Rcpp::List weights_slice_sampler ; + + arma::vec xi( std::max(maxL, maxK) ) ; + for(int l = 0; l < std::max(maxL, maxK); l++) { xi(l) = fun_xi(0.5, l+1) ; } + + bool warningL = false ; arma::vec iters_maxL(nrep) ; int countL = 0 ; + bool warningK = false ; arma::vec iters_maxK(nrep) ; int countK = 0 ; + + // progress bar + bool display_progress = progressbar ; + Progress p(nrep, display_progress) ; + + + /* START */ + for(int iter = 0; iter < nrep ; iter++) + { + if( Progress::check_abort() ) { return -1.0 ; } + p.increment(); + + + /*---------------------------------------------*/ + /* + * UPDATE MIXTURE WEIGHTS + */ + + weights_slice_sampler = weights_update_slice_sampler(y, group, + current_S, current_M, + clusterD_long, + xi, + current_alpha, + current_beta, + maxK, maxL) ; + + arma::vec tmp_pi = weights_slice_sampler["new_pi"] ; + current_pi = tmp_pi ; + + arma::mat tmp_omega = weights_slice_sampler["new_omega"] ; + current_omega = tmp_omega ; + + int maxK_new = weights_slice_sampler["new_maxK"] ; + current_maxK = maxK_new ; + if(current_maxK >= maxK) { + warningK = true ; + iters_maxK(countK) = (iter + 1) ; + countK++ ; + current_maxK = maxK-1 ; } + + int maxL_new = weights_slice_sampler["new_maxL"] ; + current_maxL = maxL_new ; + if(current_maxL >= maxL) { + warningL = true ; + iters_maxL(countL) = (iter + 1) ; + countL++ ; + current_maxL = maxL-1 ; } + + arma::mat obs_beta_rv = weights_slice_sampler["obs_beta_rv"] ; + arma::vec u_O = weights_slice_sampler["u_O"] ; + arma::vec u_D = weights_slice_sampler["u_D"] ; + + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE CLUSTER ASSIGNMENT + */ + + /* update distributional clusters S */ + current_S = slicedDP_sample_distr_cluster(group, + current_M, + current_pi, + current_omega, + u_D, xi, + current_maxK) ; + //out_S.col(iter+1) = out_S.col(iter+1) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i)) ; } + + /* update observational clusters M */ + current_M = slicedDP_sample_obs_cluster(y, + clusterD_long, + current_omega, + u_O, xi, + current_maxL, + current_mu, + current_sigma2) ; + + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE PARAMETERS + */ + out_params = sample_model_parameters(y, current_M, + maxL, + m0, tau0, lambda0, gamma0) ; + + arma::vec tmp_mu = out_params["out_mu"] ; + current_mu = tmp_mu ; + + arma::vec tmp_sigma2 = out_params["out_sigma2"] ; + current_sigma2 = tmp_sigma2 ; + + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE DIRICHLET HYPER-PARAMETERS + */ + + /* sample alpha */ + if(!fixed_alpha) { + current_alpha = sample_alpha(current_alpha, + hyp_alpha1, hyp_alpha2, + G, current_S) ; + } + + /* sample beta */ + if(!fixed_beta) { + current_beta = sample_beta(hyp_beta1, hyp_beta2, + current_S, + current_M, + clusterD_long, + obs_beta_rv) ; + } + + /*---------------------------------------------*/ + + // saving + + if(iter >= burn){ + int ind = iter-burn; + //Rcpp::Rcout << ind << "\n"; + + mu.col(ind) = current_mu; + sigma2.col(ind) = current_sigma2; + //Rcpp::Rcout << "a"; + out_maxL(ind) = current_maxL; + out_maxK(ind) = current_maxK; + //Rcpp::Rcout << "b"; + + out_alpha(ind) = current_alpha; + out_beta(ind) = current_beta; + //Rcpp::Rcout << "c"; + + out_pi.col(ind) = current_pi; + out_omega.slice(ind) = current_omega; + // Rcpp::Rcout << "d"; + + out_S.col(ind) = current_S; + out_M.col(ind) = current_M; + // Rcpp::Rcout << "e"; + + } + + + } + + Rcpp::List warnings ; + if(warningK & !(warningL)) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); + } + if(warningL & !(warningK)) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); + } + if(warningL & warningK) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK), + Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); + } + + return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), + Rcpp::Named("sigma2") = sigma2.t(), + Rcpp::Named("obs_cluster") = out_M.t(), + Rcpp::Named("distr_cluster") = out_S.t(), + Rcpp::Named("pi") = out_pi.t(), + Rcpp::Named("omega") = out_omega, + Rcpp::Named("alpha") = out_alpha, + Rcpp::Named("beta") = out_beta, + Rcpp::Named("maxK") = out_maxK, + Rcpp::Named("maxL") = out_maxL, + Rcpp::Named("warnings") = warnings); +} diff --git a/src/main_fcam.cpp b/src/main_fcam.cpp deleted file mode 100644 index 59f2fee..0000000 --- a/src/main_fcam.cpp +++ /dev/null @@ -1,240 +0,0 @@ -#include -#include "funs_fcam.h" -// [[Rcpp::depends(RcppProgress)]] -#include -#include - - -// [[Rcpp::export]] -Rcpp::List sample_fcam_arma(int nrep, // number of replications of the Gibbs sampler - const arma::vec & y, // input data - const arma::vec & group, // group assignment for each observation in the vector y - int maxK, // maximum number of distributional clusters - int maxL, // maximum number of observational clusters - double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) - double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) - bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? - double alpha, double beta, // Dirichlet parameters if fixed - double hyp_alpha1, double hyp_alpha2,// hyperparameter of the gamma prior for the distributional Dirichlet - double hyp_beta1, double hyp_beta2, // hyperparameter of the gamma prior for the observational Dirichlet - arma::vec mu_start, // starting point - arma::vec sigma2_start, - arma::vec M_start, - arma::vec S_start, - double alpha_start, double beta_start, - double eps_alpha, // MH step on alpha - double eps_beta, // MH step on beta - bool progressbar - ) -{ - int N = y.n_elem ; - arma::vec unique_groups = arma::unique(group) ; - int G = unique_groups.n_elem ; - - // allocate output matrices - arma::mat mu(maxL, nrep, arma::fill::zeros) ; - arma::mat sigma2(maxL, nrep, arma::fill::ones) ; - arma::mat out_S(G, nrep) ; - arma::mat out_M(N, nrep) ; - arma::mat pi(maxK, nrep, arma::fill::zeros) ; - arma::cube omega(maxL, maxK, nrep, arma::fill::zeros) ; - arma::vec out_alpha(nrep) ; // Dirichlet parameter for the distributional weights - arma::vec out_beta(nrep) ; // Dirichlet parameter for the observational weights - arma::vec out_K(nrep) ; - arma::vec out_L(nrep) ; - arma::vec out_Kplus(nrep) ; - arma::vec out_Lplus(nrep) ; - - // initialization - mu.col(0) = mu_start ; - sigma2.col(0) = sigma2_start ; - out_M.col(0) = M_start ; - out_S.col(0) = S_start ; - out_K(0) = maxK-1 ; - out_L(0) = maxL-1 ; - out_Kplus(0) = out_S.col(0).max() + 1 ; - out_Lplus(0) = out_M.col(0).max() + 1 ; - pi.col(0) = arma::ones(maxK)/maxK ; - omega.slice(0) = arma::ones(maxL, maxK) / maxL ; - - Rcpp::List out_params ; - Rcpp::List tmp_obs_cl ; - bool warningL = false ; arma::vec iters_maxL(nrep-1) ; int countL = 0 ; - bool warningK = false ; arma::vec iters_maxK(nrep-1) ; int countK = 0 ; - - if(fixed_alpha) { - out_alpha.fill(alpha) ; - } else { - out_alpha(0) = alpha_start ; - } - - if(fixed_beta) { - out_beta.fill(beta) ; - } else { - out_beta(0) = beta_start ; - } - - arma::vec clusterD_long(N) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), 0 ) ; } - - // progress bar - bool display_progress = progressbar ; - Progress p(nrep, display_progress) ; - - // START - for(int iter = 0; iter < nrep -1 ; iter++) - { - if( Progress::check_abort() ) { return -1.0 ; } - p.increment(); - - /*---------------------------------------------*/ - /* - * UPDATE CLUSTER ASSIGNMENT - */ - /* update distributional clusters S */ - out_S.col(iter+1) = fcam_sample_distr_cluster(y, group, - pi.col(iter), omega.slice(iter), - out_M.col(iter), - out_K(iter)) ; - - out_Kplus(iter+1) = out_S.col(iter+1).max() + 1 ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), iter+1 ) ; } - - /* update observational clusters M */ - out_M.col(iter+1) = fcam_sample_obs_cluster(y, - clusterD_long, - omega.slice(iter), - out_K(iter), out_L(iter), - mu.col(iter), sigma2.col(iter)) ; - - // out_M.col(iter+1) = out_M.col(iter) ; - out_Lplus(iter+1) = out_M.col(iter+1).max() + 1; - - - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE PARAMETERS - */ - out_params = sample_model_parameters(y, out_M.col(iter+1), - maxL, - m0, tau0, lambda0, gamma0) ; - - arma::vec tmp_mu = out_params["out_mu"] ; - mu.col(iter+1) = tmp_mu ; - - arma::vec tmp_sigma2 = out_params["out_sigma2"] ; - sigma2.col(iter+1) = tmp_sigma2 ; - - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE THE NUMBER OF COMPONENTS - */ - - /* update distributional components out_K */ - out_K(iter + 1) = fcam_sample_K(maxK, - out_Kplus(iter+1), - 1, 4, 3, - out_alpha(iter), - out_S.col(iter + 1)) ; - if(out_K(iter + 1) >= maxK) { - warningK = true ; - iters_maxK(countK) = (iter + 1) ; - countK++ ; - out_K(iter + 1) = maxK-1 ; } - - /* update observational clusters out_L */ - out_L(iter + 1) = fcam_sample_K(maxL, - out_Lplus(iter+1), - 1, 4, 3, - out_beta(iter), - out_M.col(iter + 1)) ; - if(out_L(iter + 1) >= maxL) { - warningL = true ; - iters_maxL(countL) = (iter + 1) ; - countL++ ; - out_L(iter + 1) = maxL-1 ; } - - mu(arma::span(out_L(iter+1), maxL-1), arma::span(iter+1)) = arma::zeros(maxL - out_L(iter+1)) ; - sigma2(arma::span(out_L(iter+1), maxL-1), iter+1) = arma::zeros(maxL - out_L(iter+1)) ; - - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE DIRICHLET HYPER-PARAMETERS - */ - /* sample alpha */ - if(!fixed_alpha) { - out_alpha(iter+1) = fcam_MH_alpha(out_alpha(iter), eps_alpha, - hyp_alpha1, hyp_alpha2, - out_Kplus(iter+1), out_K(iter+1), - out_S.col(iter+1)) ; - } - - /* sample beta */ - if(!fixed_beta) { - out_beta(iter+1) = fcam_MH_alpha(out_beta(iter), eps_beta, - hyp_beta1, hyp_beta2, - out_Lplus(iter+1), out_L(iter+1), - out_M.col(iter+1)) ; - } - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE MIXTURE WEIGHTS - */ - pi.col(iter+1) = dirichlet_sample_distr_weights(out_S.col(iter+1), - out_alpha(iter+1), - out_K(iter+1), maxK) ; - - omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter+1), - clusterD_long, - out_beta(iter+1), - out_K(iter+1)+1, out_L(iter+1), - maxK, maxL) ; - - - // pi.col(iter+1) = pi.col(iter) ; - // omega.slice(iter+1) = omega.slice(iter) ; - /*---------------------------------------------*/ - - - - - // END - } - - Rcpp::List warnings ; - if(warningK & !(warningL)) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); - } - if(warningL & !(warningK)) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); - } - if(warningL & warningK) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK), - Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); - } - - return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), - Rcpp::Named("sigma2") = sigma2.t(), - Rcpp::Named("obs_cluster") = out_M.t(), - Rcpp::Named("distr_cluster") = out_S.t(), - Rcpp::Named("pi") = pi.t(), - Rcpp::Named("omega") = omega, - Rcpp::Named("alpha") = out_alpha, - Rcpp::Named("beta") = out_beta, - Rcpp::Named("K_plus") = out_Kplus, - Rcpp::Named("L_plus") = out_Lplus, - Rcpp::Named("K") = out_K, - Rcpp::Named("L") = out_L, - Rcpp::Named("warnings") = warnings); -} diff --git a/src/main_fcam_burn.cpp b/src/main_fcam_burn.cpp new file mode 100644 index 0000000..15c9d9e --- /dev/null +++ b/src/main_fcam_burn.cpp @@ -0,0 +1,277 @@ +#include +#include "funs_fcam.h" +// [[Rcpp::depends(RcppProgress)]] +#include +#include + + +// [[Rcpp::export]] +Rcpp::List sample_fcam_burn(int nrep, // number of replications of the Gibbs sampler + int burn, + const arma::vec & y, // input data + const arma::vec & group, // group assignment for each observation in the vector y + int maxK, // maximum number of distributional clusters + int maxL, // maximum number of observational clusters + double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) + double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) + bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? + double alpha, double beta, // Dirichlet parameters if fixed + double hyp_alpha1, double hyp_alpha2,// hyperparameter of the gamma prior for the distributional Dirichlet + double hyp_beta1, double hyp_beta2, // hyperparameter of the gamma prior for the observational Dirichlet + arma::vec mu_start, // starting point + arma::vec sigma2_start, + arma::vec M_start, + arma::vec S_start, + double alpha_start, double beta_start, + double eps_alpha, // MH step on alpha + double eps_beta, // MH step on beta + bool progressbar +) +{ + int N = y.n_elem ; + arma::vec unique_groups = arma::unique(group) ; + int G = unique_groups.n_elem ; + + // allocate output matrices + arma::mat mu(maxL, nrep - burn, arma::fill::zeros) ; + arma::mat sigma2(maxL, nrep - burn, arma::fill::ones) ; + arma::mat out_S(G, nrep - burn) ; // distributional clusters + arma::mat out_M(N, nrep - burn) ; // observational clusters + arma::mat out_pi(maxK, nrep - burn, arma::fill::zeros) ; + arma::cube out_omega(maxL, maxK, nrep - burn, arma::fill::zeros) ; + arma::vec out_alpha(nrep - burn) ; // DP parameter for the distributional weights + arma::vec out_beta(nrep - burn) ; // DP parameter for the observational weights + arma::vec out_K(nrep - burn) ; + arma::vec out_L(nrep - burn) ; + arma::vec out_Kplus(nrep-burn) ; + arma::vec out_Lplus(nrep-burn) ; + + // initialization + arma::vec current_mu = mu_start ; + arma::vec current_sigma2 = sigma2_start ; + arma::vec current_M = M_start ; + arma::vec current_S = S_start ; + int current_K = maxK-1 ; + int current_L = maxL-1 ; + int current_Kplus = current_S.max() + 1 ; + int current_Lplus = current_M.max() + 1 ; + arma::vec current_pi = arma::ones(maxK)/maxK ; + arma::mat current_omega = arma::ones(maxL, maxK) / maxL ; + + Rcpp::List out_params ; + Rcpp::List tmp_obs_cl ; + bool warningL = false ; arma::vec iters_maxL(nrep) ; int countL = 0 ; + bool warningK = false ; arma::vec iters_maxK(nrep) ; int countK = 0 ; + + double current_beta = 0; + double current_alpha = 0; + + if(fixed_alpha) { + out_alpha.fill(alpha) ; + current_alpha = alpha_start ; + } else { + current_alpha = alpha_start ; + } + + if(fixed_beta) { + out_beta.fill(beta) ; + current_beta = beta_start; + } else { + current_beta = beta_start ; + } + + + + // auxiliary quantities + arma::vec clusterD_long(N) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = S_start( group(i) ) ; } + + // progress bar + bool display_progress = progressbar ; + Progress p(nrep, display_progress) ; + + // START + for(int iter = 0; iter < nrep ; iter++) + { + if( Progress::check_abort() ) { return -1.0 ; } + p.increment(); + + /*---------------------------------------------*/ + /* + * UPDATE CLUSTER ASSIGNMENT + */ + /* update distributional clusters S */ + current_S = fcam_sample_distr_cluster(y, group, + current_pi, + current_omega, + current_M, + current_K) ; + + current_Kplus = current_S.max() + 1 ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i) ) ; } + + /* update observational clusters M */ + current_M = fcam_sample_obs_cluster(y, + clusterD_long, + current_omega, + current_K, + current_L, + current_mu, + current_sigma2) ; + + // out_M.col(iter+1) = out_M.col(iter) ; + current_Lplus = current_M.max() + 1; + + + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE PARAMETERS + */ + out_params = sample_model_parameters(y, current_M, + maxL, + m0, tau0, lambda0, gamma0) ; + + arma::vec tmp_mu = out_params["out_mu"] ; + current_mu = tmp_mu ; + + arma::vec tmp_sigma2 = out_params["out_sigma2"] ; + current_sigma2 = tmp_sigma2 ; + + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE THE NUMBER OF COMPONENTS + */ + + /* update distributional components out_K */ + current_K = fcam_sample_K(maxK, + current_Kplus, + 1, 4, 3, + current_alpha, + current_S) ; + if(current_K >= maxK) { + warningK = true ; + iters_maxK(countK) = (iter + 1) ; + countK++ ; + current_K = maxK-1 ; } + + /* update observational clusters out_L */ + current_L = fcam_sample_K(maxL, + current_Lplus, + 1, 4, 3, + current_beta, + current_M) ; + if(current_L >= maxL) { + warningL = true ; + iters_maxL(countL) = (iter + 1) ; + countL++ ; + current_L = maxL-1 ; } + + current_mu(arma::span(current_L, maxL-1)) = arma::zeros(maxL - current_L) ; + current_sigma2(arma::span(current_L, maxL-1)) = arma::zeros(maxL -current_L) ; + + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE DIRICHLET HYPER-PARAMETERS + */ + /* sample alpha */ + if(!fixed_alpha) { + current_alpha = fcam_MH_alpha(current_alpha, eps_alpha, + hyp_alpha1, hyp_alpha2, + current_Kplus, current_K, + current_S) ; + } + + /* sample beta */ + if(!fixed_beta) { + current_beta = fcam_MH_alpha(current_beta, eps_beta, + hyp_beta1, hyp_beta2, + current_Lplus, current_L, + current_M) ; + } + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE MIXTURE WEIGHTS + */ + current_pi = dirichlet_sample_distr_weights(current_S, + current_alpha, + current_K, maxK) ; + + current_omega = dirichlet_sample_obs_weights(current_M, + clusterD_long, + current_beta, + current_K+1, current_L, + maxK, maxL) ; + + + // pi.col(iter+1) = pi.col(iter) ; + // omega.slice(iter+1) = omega.slice(iter) ; + /*---------------------------------------------*/ + + if(iter >= burn){ + int ind = iter-burn; + //Rcpp::Rcout << ind << "\n"; + + mu.col(ind) = current_mu; + sigma2.col(ind) = current_sigma2; + //Rcpp::Rcout << "a"; + + out_alpha(ind) = current_alpha; + out_beta(ind) = current_beta; + //Rcpp::Rcout << "c"; + + out_pi.col(ind) = current_pi; + out_omega.slice(ind) = current_omega; + // Rcpp::Rcout << "d"; + + out_S.col(ind) = current_S; + out_M.col(ind) = current_M; + // Rcpp::Rcout << "e"; + + out_Kplus(ind) = current_Lplus; + out_Lplus(ind) = current_Kplus; + out_L(ind) = current_L; + out_K(ind) = current_K; + } + + + + // END + } + + Rcpp::List warnings ; + if(warningK & !(warningL)) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); + } + if(warningL & !(warningK)) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); + } + if(warningL & warningK) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK), + Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); + } + + return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), + Rcpp::Named("sigma2") = sigma2.t(), + Rcpp::Named("obs_cluster") = out_M.t(), + Rcpp::Named("distr_cluster") = out_S.t(), + Rcpp::Named("pi") = out_pi.t(), + Rcpp::Named("omega") = out_omega, + Rcpp::Named("alpha") = out_alpha, + Rcpp::Named("beta") = out_beta, + Rcpp::Named("K_plus") = out_Kplus, + Rcpp::Named("L_plus") = out_Lplus, + Rcpp::Named("K") = out_K, + Rcpp::Named("L") = out_L, + Rcpp::Named("warnings") = warnings); +} diff --git a/src/main_ficam.cpp b/src/main_ficam.cpp deleted file mode 100644 index e59e597..0000000 --- a/src/main_ficam.cpp +++ /dev/null @@ -1,245 +0,0 @@ -#include -#include "funs_ficam.h" -// [[Rcpp::depends(RcppProgress)]] -#include -#include - - - -// [[Rcpp::export]] -Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sampler - const arma::vec & y, // input data - const arma::vec & group, // group assignment for each observation in the vector y - int maxK, // maximum number of distributional clusters - int maxL, // maximum number of observational clusters - double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) - double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) - bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? - double alpha, double beta, // Dirichlet parameters if fixed - double hyp_alpha1, double hyp_alpha2, // hyperparameters of alpha ( par of the distributional DP ) - double hyp_beta1, double hyp_beta2, // hyperparameter of the gamma prior for the observational Dirichlet - arma::vec mu_start, // starting point - arma::vec sigma2_start, - arma::vec M_start, - arma::vec S_start, - double alpha_start, double beta_start, - double eps_beta, // MH step on beta - bool progressbar - ) -{ - int N = y.n_elem ; - arma::vec unique_groups = arma::unique(group) ; - int G = unique_groups.n_elem ; - - // allocate output matrices - arma::mat mu(maxL, nrep, arma::fill::zeros) ; - arma::mat sigma2(maxL, nrep, arma::fill::ones) ; - arma::mat out_S(G, nrep) ; - arma::mat out_M(N, nrep) ; - arma::mat pi(maxK, nrep, arma::fill::zeros) ; - arma::cube omega(maxL, maxK, nrep, arma::fill::zeros) ; - arma::vec out_alpha(nrep) ; // DP parameter for the distributional weights - arma::vec out_beta(nrep) ; // Dirichlet parameter for the observational weights - arma::vec out_maxK(nrep) ; - arma::vec out_L(nrep) ; - arma::vec out_Lplus(nrep) ; - - // initialization - mu.col(0) = mu_start ; - sigma2.col(0) = sigma2_start ; - out_M.col(0) = M_start ; - out_S.col(0) = S_start ; - out_maxK(0) = maxK; // checked: removed - 1 - out_L(0) = maxL; // checked: removed - 1 - /// - out_Lplus(0) = out_M.col(0).max() + 1 ; - pi.col(0) = arma::ones(maxK)/maxK ; - omega.slice(0) = arma::ones(maxL, maxK) / maxL ; - - Rcpp::List tmp_obs_cl ; - Rcpp::List out_params ; - Rcpp::List weights_slice_sampler ; - //arma::vec u_D(G, arma::fill::zeros) ; - - if(fixed_alpha) { - out_alpha.fill(alpha) ; - } else { - out_alpha(0) = alpha_start ; - } - - if(fixed_beta) { - out_beta.fill(beta) ; - } else { - out_beta(0) = beta_start ; - } - - arma::vec clusterD_long(N) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), 0 ) ; } - arma::vec xi(maxK) ; - for(int k = 0; k < maxK; k++) { xi(k) = fun_xi(0.5, k+1) ; } - - bool warningL = false ; arma::vec iters_maxL(nrep-1) ; int countL = 0 ; - bool warningK = false ; arma::vec iters_maxK(nrep-1) ; int countK = 0 ; - - // progress bar - bool display_progress = progressbar ; - Progress p(nrep, display_progress) ; - - // START - for(int iter = 0; iter < nrep -1 ; iter++) - { - if( Progress::check_abort() ) { return -1.0 ; } - p.increment(); - - /*---------------------------------------------*/ - /* - * UPDATE MIXTURE WEIGHTS - */ - - weights_slice_sampler = ficam_weights_update_slice_sampler(group, - out_S.col(iter), - out_alpha(iter), - xi, - maxK) ; - - arma::vec tmp_pi = weights_slice_sampler["new_pi"] ; - pi.col(iter+1) = tmp_pi ; - - int maxK_new = weights_slice_sampler["new_maxK"] ; - out_maxK(iter+1) = maxK_new ; - if(out_maxK(iter + 1) >= maxK) { - warningK = true ; - iters_maxK(countK) = (iter + 1) ; - countK++ ; - out_maxK(iter + 1) = maxK-1 ; } - - arma::vec u_D = weights_slice_sampler["u_D"] ; - - // pi.col(iter+1) = pi.col(iter) ; - // out_maxK(iter+1) = out_maxK(iter) ; - - omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter), - clusterD_long, - out_beta(iter), - out_maxK(iter+1), out_L(iter), // checked: added iter + 1 - maxK, maxL) ; - - // pi.col(iter+1) = pi.col(iter) ; - // omega.slice(iter+1) = omega.slice(iter) ; - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE CLUSTER ASSIGNMENT - */ - /* update distributional clusters S */ - /// !!!!!! attention, I changed the function here! - // out_S.col(iter+1) = slicedDP_sample_distr_cluster2(group, - out_S.col(iter+1) = slicedDP_sample_distr_cluster(group, - out_M.col(iter), - pi.col(iter+1), omega.slice(iter+1), - u_D, xi, - out_maxK(iter+1)) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), iter+1 ) ; } - - /* update observational clusters M */ - out_M.col(iter+1) = fcam_sample_obs_cluster(y, - clusterD_long, - omega.slice(iter+1), - out_maxK(iter+1), out_L(iter), - mu.col(iter), sigma2.col(iter)) ; - - // out_M.col(iter+1) = out_M.col(iter) ; - out_Lplus(iter+1) = out_M.col(iter+1).max() + 1; - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE PARAMETERS - */ - out_params = sample_model_parameters(y, out_M.col(iter+1), - maxL, - m0, tau0, lambda0, gamma0) ; - - arma::vec tmp_mu = out_params["out_mu"] ; - mu.col(iter+1) = tmp_mu ; - - arma::vec tmp_sigma2 = out_params["out_sigma2"] ; - sigma2.col(iter+1) = tmp_sigma2 ; - - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE THE NUMBER OF COMPONENTS - */ - /* update observational clusters out_L */ - out_L(iter + 1) = fcam_sample_K(maxL -1, // checked: added - 1 - out_Lplus(iter+1), - 1, 4, 3, - out_beta(iter), - out_M.col(iter + 1)) ; - - if(out_L(iter + 1) >= maxL) { - warningL = true ; - iters_maxL(countL) = (iter + 1) ; - countL++ ; - out_L(iter + 1) = maxL-1 ; } - - mu(arma::span(out_L(iter+1), maxL-1), - arma::span(iter+1)) = // span e' necessario? - arma::zeros(maxL - out_L(iter+1)) ; - sigma2(arma::span(out_L(iter+1), maxL-1), iter+1) = arma::zeros(maxL - out_L(iter+1)) ; - - // out_L(iter + 1) = out_L(iter) ; - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE DIRICHLET HYPER-PARAMETERS - */ - /* sample alpha */ - if(!fixed_alpha) { - out_alpha(iter+1) = sample_alpha(out_alpha(iter), - hyp_alpha1, hyp_alpha2, - G, out_S.col(iter+1)) ; - } - - /* sample beta */ - if(!fixed_beta) { - out_beta(iter+1) = fcam_MH_alpha(out_beta(iter), eps_beta, - hyp_beta1, hyp_beta2, - out_Lplus(iter+1), out_L(iter+1), - out_M.col(iter+1)) ; - } - /*---------------------------------------------*/ - - - // END - } - - Rcpp::List warnings ; - if(warningK & !(warningL)) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); - } - if(warningL & !(warningK)) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); - } - if(warningL & warningK) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK), - Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); - } - - return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), - Rcpp::Named("sigma2") = sigma2.t(), - Rcpp::Named("obs_cluster") = out_M.t(), - Rcpp::Named("distr_cluster") = out_S.t(), - Rcpp::Named("pi") = pi.t(), - Rcpp::Named("omega") = omega, - Rcpp::Named("alpha") = out_alpha, - Rcpp::Named("beta") = out_beta, - Rcpp::Named("maxK") = out_maxK, - Rcpp::Named("L_plus") = out_Lplus, - Rcpp::Named("L") = out_L, - Rcpp::Named("warnings") = warnings ); -} diff --git a/src/main_ficam_burn.cpp b/src/main_ficam_burn.cpp new file mode 100644 index 0000000..23adb61 --- /dev/null +++ b/src/main_ficam_burn.cpp @@ -0,0 +1,270 @@ +#include +#include "funs_ficam.h" +// [[Rcpp::depends(RcppProgress)]] +#include +#include + + + +// [[Rcpp::export]] +Rcpp::List sample_ficam_burn(int nrep, // number of replications of the Gibbs sampler + int burn, + const arma::vec & y, // input data + const arma::vec & group, // group assignment for each observation in the vector y + int maxK, // maximum number of distributional clusters + int maxL, // maximum number of observational clusters + double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) + double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) + bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? + double alpha, double beta, // Dirichlet parameters if fixed + double hyp_alpha1, double hyp_alpha2, // hyperparameters of alpha ( par of the distributional DP ) + double hyp_beta1, double hyp_beta2, // hyperparameter of the gamma prior for the observational Dirichlet + arma::vec mu_start, // starting point + arma::vec sigma2_start, + arma::vec M_start, + arma::vec S_start, + double alpha_start, double beta_start, + double eps_beta, // MH step on beta + bool progressbar +) +{ + int N = y.n_elem ; + + arma::vec unique_groups = arma::unique(group) ; + int G = unique_groups.n_elem ; + + // allocate output matrices + arma::mat mu(maxL, nrep - burn, arma::fill::zeros) ; + arma::mat sigma2(maxL, nrep - burn, arma::fill::ones) ; + arma::mat out_S(G, nrep - burn) ; // distributional clusters + arma::mat out_M(N, nrep - burn) ; // observational clusters + arma::mat out_pi(maxK, nrep - burn, arma::fill::zeros) ; + arma::cube out_omega(maxL, maxK, nrep - burn, arma::fill::zeros) ; + arma::vec out_alpha(nrep - burn) ; // DP parameter for the distributional weights + arma::vec out_beta(nrep - burn) ; // DP parameter for the observational weights + arma::vec out_maxK(nrep - burn) ; + arma::vec out_L(nrep - burn) ; + arma::vec out_Lplus(nrep-burn) ; + + // initialization + arma::vec current_mu = mu_start ; + arma::vec current_sigma2 = sigma2_start ; + arma::vec current_M = M_start ; + arma::vec current_S = S_start ; + int current_maxK = maxK ; + int current_L = maxL ; + int current_Lplus = current_M.max() + 1 ; + arma::vec current_pi = arma::ones(maxK)/maxK ; + arma::mat current_omega = arma::ones(maxL, maxK) / maxL ; + + Rcpp::List out_params ; + Rcpp::List tmp_obs_cl ; + Rcpp::List weights_slice_sampler ; + bool warningL = false ; arma::vec iters_maxL(nrep) ; int countL = 0 ; + bool warningK = false ; arma::vec iters_maxK(nrep) ; int countK = 0 ; + + //arma::vec u_D(G, arma::fill::zeros) ; + double current_beta = 0; + double current_alpha = 0; + + if(fixed_alpha) { + out_alpha.fill(alpha) ; + current_alpha = alpha_start ; + } else { + current_alpha = alpha_start ; + } + + if(fixed_beta) { + out_beta.fill(beta) ; + current_beta = beta_start; + } else { + current_beta = beta_start ; + } + + arma::vec clusterD_long(N) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i) ) ; } + arma::vec xi(maxK) ; + for(int k = 0; k < maxK; k++) { xi(k) = fun_xi(0.5, k+1) ; } + + // progress bar + bool display_progress = progressbar ; + Progress p(nrep, display_progress) ; + + // START + for(int iter = 0; iter < nrep ; iter++) + { + if( Progress::check_abort() ) { return -1.0 ; } + p.increment(); + + /*---------------------------------------------*/ + /* + * UPDATE MIXTURE WEIGHTS + */ + + weights_slice_sampler = ficam_weights_update_slice_sampler(group, + current_S, + current_alpha, + xi, + maxK) ; + + arma::vec tmp_pi = weights_slice_sampler["new_pi"] ; + current_pi = tmp_pi ; + + int maxK_new = weights_slice_sampler["new_maxK"] ; + current_maxK = maxK_new ; + if(current_maxK >= maxK) { + warningK = true ; + iters_maxK(countK) = (iter + 1) ; + countK++ ; + current_maxK = maxK-1 ; } + + arma::vec u_D = weights_slice_sampler["u_D"] ; + + + current_omega = dirichlet_sample_obs_weights(current_M, + clusterD_long, + current_beta, + current_maxK, + current_L, // checked: added iter + 1 + maxK, maxL) ; + + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE CLUSTER ASSIGNMENT + */ + current_S = slicedDP_sample_distr_cluster(group, + current_M, + current_pi, + current_omega, + u_D, xi, + current_maxK) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i)) ; } + + /* update observational clusters M */ + current_M = fcam_sample_obs_cluster(y, + clusterD_long, + current_omega, + current_maxK, + current_L, + current_mu, + current_sigma2) ; + + current_Lplus = current_M.max() + 1; + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE PARAMETERS + */ + out_params = sample_model_parameters(y, current_M, + maxL, + m0, tau0, lambda0, gamma0) ; + + arma::vec tmp_mu = out_params["out_mu"] ; + current_mu = tmp_mu ; + + arma::vec tmp_sigma2 = out_params["out_sigma2"] ; + current_sigma2 = tmp_sigma2 ; + + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE THE NUMBER OF COMPONENTS + */ + /* update observational clusters out_L */ + current_L = fcam_sample_K(maxL -1, // checked: added - 1 + current_Lplus, + 1, 4, 3, + current_beta, + current_M) ; + + if(current_L >= maxL) { + warningL = true ; + iters_maxL(countL) = (iter + 1) ; + countL++ ; + current_L = maxL-1 ; } + + current_mu(arma::span(current_L, maxL-1)) = // span e' necessario? + arma::zeros(maxL - current_L) ; + current_sigma2(arma::span(current_L, maxL-1)) = arma::zeros(maxL - current_L) ; + + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE DIRICHLET HYPER-PARAMETERS + */ + /* sample alpha */ + if(!fixed_alpha) { + current_alpha = sample_alpha(current_alpha, + hyp_alpha1, hyp_alpha2, + G, current_S) ; + } + + /* sample beta */ + if(!fixed_beta) { + current_beta = fcam_MH_alpha(current_beta, eps_beta, + hyp_beta1, hyp_beta2, + current_Lplus, current_L, + current_M) ; + } + /*---------------------------------------------*/ + + // saving + + if(iter >= burn){ + int ind = iter-burn; + //Rcpp::Rcout << ind << "\n"; + + mu.col(ind) = current_mu; + sigma2.col(ind) = current_sigma2; + //Rcpp::Rcout << "a"; + out_L(ind) = current_L; + out_Lplus(ind) = current_Lplus; + out_maxK(ind) = current_maxK; + //Rcpp::Rcout << "b"; + + out_alpha(ind) = current_alpha; + out_beta(ind) = current_beta; + //Rcpp::Rcout << "c"; + + out_pi.col(ind) = current_pi; + out_omega.slice(ind) = current_omega; + // Rcpp::Rcout << "d"; + + out_S.col(ind) = current_S; + out_M.col(ind) = current_M; + // Rcpp::Rcout << "e"; + + } + // END + } + + Rcpp::List warnings ; + if(warningK & !(warningL)) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); + } + if(warningL & !(warningK)) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); + } + if(warningL & warningK) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK), + Rcpp::Named("top_maxL") = iters_maxL.head(countL) ); + } + + return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), + Rcpp::Named("sigma2") = sigma2.t(), + Rcpp::Named("obs_cluster") = out_M.t(), + Rcpp::Named("distr_cluster") = out_S.t(), + Rcpp::Named("pi") = out_pi.t(), + Rcpp::Named("omega") = out_omega, + Rcpp::Named("alpha") = out_alpha, + Rcpp::Named("beta") = out_beta, + Rcpp::Named("maxK") = out_maxK, + Rcpp::Named("L_plus") = out_Lplus, + Rcpp::Named("L") = out_L, + Rcpp::Named("warnings") = warnings ); +} diff --git a/src/main_overcam.cpp b/src/main_overcam_burn.cpp similarity index 54% rename from src/main_overcam.cpp rename to src/main_overcam_burn.cpp index 6d6a862..de353cd 100644 --- a/src/main_overcam.cpp +++ b/src/main_overcam_burn.cpp @@ -5,7 +5,8 @@ #include // [[Rcpp::export]] -Rcpp::List sample_overcam_arma(int nrep, // number of replications of the Gibbs sampler +Rcpp::List sample_overcam_burn(int nrep, // number of replications of the Gibbs sampler + int burn, const arma::vec & y, // input data const arma::vec & group, // group assignment for each observation in the vector y int maxK, // maximum number of distributional clusters @@ -31,68 +32,83 @@ Rcpp::List sample_overcam_arma(int nrep, // number of replications of the Gibbs arma::vec unique_groups = arma::unique(group) ; int G = unique_groups.n_elem ; + // allocate output matrices - arma::mat mu(maxL, nrep, arma::fill::zeros) ; - arma::mat sigma2(maxL, nrep, arma::fill::ones) ; - arma::mat out_S(G, nrep) ; - arma::mat out_M(N, nrep) ; - arma::mat pi(maxK, nrep, arma::fill::zeros) ; - arma::cube omega(maxL, maxK, nrep, arma::fill::zeros) ; - arma::vec out_alpha(nrep) ; // Dirichlet parameter for the distributional weights - arma::vec out_beta(nrep) ; // Dirichlet parameter for the observational weights + arma::mat mu(maxL, nrep - burn, arma::fill::zeros) ; + arma::mat sigma2(maxL, nrep - burn, arma::fill::ones) ; + arma::mat out_S(G, nrep - burn) ; // distributional clusters + arma::mat out_M(N, nrep - burn) ; // observational clusters + arma::mat out_pi(maxK, nrep - burn, arma::fill::zeros) ; + arma::cube out_omega(maxL, maxK, nrep - burn, arma::fill::zeros) ; + arma::vec out_alpha(nrep - burn) ; // DP parameter for the distributional weights + arma::vec out_beta(nrep - burn) ; // DP parameter for the observational weights + arma::vec out_maxK(nrep - burn) ; + arma::vec out_maxL(nrep - burn) ; // initialization - mu.col(0) = mu_start ; - sigma2.col(0) = sigma2_start ; - out_M.col(0) = M_start ; - out_S.col(0) = S_start ; + arma::vec current_mu = mu_start ; + arma::vec current_sigma2 = sigma2_start ; + arma::vec current_M = M_start; + arma::vec current_S = S_start; + arma::vec current_pi(maxK) ; + current_pi.fill(1.0/(maxK)); + arma::mat current_omega(maxL,maxK) ; + current_omega.fill(1.0/(maxL)); + + + double current_beta = 0; + double current_alpha = 0; if(fixed_alpha) { out_alpha.fill(alpha) ; + current_alpha = alpha_start ; } else { - out_alpha(0) = alpha_start ; + current_alpha = alpha_start ; } if(fixed_beta) { out_beta.fill(beta) ; + current_beta = beta_start; } else { - out_beta(0) = beta_start ; + current_beta = beta_start ; } - // other auxiliary quantities - Rcpp::List out_params ; + + + // auxiliary quantities arma::vec clusterD_long(N) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), 0 ) ; } + for(int i = 0; i < N; i++) { clusterD_long(i) = S_start( group(i) ) ; } + Rcpp::List out_params ; - arma::vec iters_maxL(nrep-1); - arma::vec iters_maxK(nrep-1); + arma::vec iters_maxL(nrep-burn); + arma::vec iters_maxK(nrep-burn); // progress bar bool display_progress = progressbar ; Progress p(nrep, display_progress) ; // START - for(int iter = 0; iter < nrep -1 ; iter++) + for(int iter = 0; iter < nrep ; iter++) { if( Progress::check_abort() ) { return -1.0 ; } p.increment(); - + /*---------------------------------------------*/ /* * UPDATE MIXTURE WEIGHTS */ /* sample distributional probabilities pi */ - pi.col(iter+1) = dirichlet_sample_distr_weights(out_S.col(iter), - out_alpha(iter)*maxK, - maxK, - maxK) ; - + current_pi = dirichlet_sample_distr_weights(current_S, + current_alpha*maxK, + maxK, + maxK) ; + /* for each distributional cluster k, sample observational probabilities omega */ - omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter), - clusterD_long, - out_beta(iter)*maxL, - maxK, maxL, - maxK, maxL) ; + current_omega = dirichlet_sample_obs_weights(current_M, + clusterD_long, + current_beta*maxL, + maxK, maxL, + maxK, maxL) ; /*---------------------------------------------*/ @@ -100,21 +116,22 @@ Rcpp::List sample_overcam_arma(int nrep, // number of replications of the Gibbs /* * UPDATE CLUSTER ASSIGNMENT */ - + /* update distributional clusters S */ - out_S.col(iter+1) = sample_distr_cluster(y, group, - pi.col(iter+1), omega.slice(iter+1), - maxK, maxL, - mu.col(iter), sigma2.col(iter)) ; - - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), iter+1 ) ; } + current_S = sample_distr_cluster(y, group, + current_pi, current_omega, + maxK, maxL, + current_mu, current_sigma2) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i) ) ; } + /* update observational clusters M */ - out_M.col(iter+1) = sample_obs_cluster(y, - clusterD_long, - omega.slice(iter+1), - maxK, maxL, - mu.col(iter), sigma2.col(iter)) ; + current_M = sample_obs_cluster(y, + clusterD_long, + current_omega, + maxK, maxL, + current_mu, + current_sigma2) ; /*---------------------------------------------*/ @@ -123,15 +140,15 @@ Rcpp::List sample_overcam_arma(int nrep, // number of replications of the Gibbs /* * UPDATE PARAMETERS */ - out_params = sample_model_parameters(y, out_M.col(iter+1), + out_params = sample_model_parameters(y, current_M, maxL, m0, tau0, lambda0, gamma0) ; arma::vec tmp_mu = out_params["out_mu"] ; - mu.col(iter+1) = tmp_mu ; + current_mu = tmp_mu ; arma::vec tmp_sigma2 = out_params["out_sigma2"] ; - sigma2.col(iter+1) = tmp_sigma2 ; + current_sigma2 = tmp_sigma2 ; /*---------------------------------------------*/ @@ -142,25 +159,52 @@ Rcpp::List sample_overcam_arma(int nrep, // number of replications of the Gibbs */ /* sample alpha */ if(!fixed_alpha) { - out_alpha(iter+1) = overcam_MH_alpha(out_alpha(iter), eps_alpha, pi.col(iter+1), hyp_alpha) ; + current_alpha = overcam_MH_alpha(current_alpha, + eps_alpha, + current_pi, hyp_alpha) ; } /* sample beta */ if(!fixed_beta) { - out_beta(iter+1) = overcam_MH_beta(out_beta(iter), eps_beta, omega.slice(iter+1), hyp_beta) ; + current_beta = overcam_MH_beta(current_beta, eps_beta, + current_omega, hyp_beta) ; } /*---------------------------------------------*/ + // saving + + if(iter >= burn){ + int ind = iter-burn; + //Rcpp::Rcout << ind << "\n"; + + mu.col(ind) = current_mu; + sigma2.col(ind) = current_sigma2; + //Rcpp::Rcout << "a"; + + out_alpha(ind) = current_alpha; + out_beta(ind) = current_beta; + //Rcpp::Rcout << "c"; + + out_pi.col(ind) = current_pi; + out_omega.slice(ind) = current_omega; + // Rcpp::Rcout << "d"; + + out_S.col(ind) = current_S; + out_M.col(ind) = current_M; + // Rcpp::Rcout << "e"; + + } + // END } - return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), Rcpp::Named("sigma2") = sigma2.t(), Rcpp::Named("obs_cluster") = out_M.t(), Rcpp::Named("distr_cluster") = out_S.t(), - Rcpp::Named("pi") = pi.t(), - Rcpp::Named("omega") = omega, + Rcpp::Named("pi") = out_pi.t(), + Rcpp::Named("omega") = out_omega, Rcpp::Named("alpha") = out_alpha, Rcpp::Named("beta") = out_beta); + } diff --git a/src/main_overficam.cpp b/src/main_overficam.cpp deleted file mode 100644 index f16bbdb..0000000 --- a/src/main_overficam.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include -#include "funs_overcam.h" -#include "funs_cam.h" -#include "funs_fcam.h" -#include "funs_ficam.h" -#include "funs_san.h" - -// [[Rcpp::depends(RcppProgress)]] -#include -#include - -// [[Rcpp::export]] -Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibbs sampler - const arma::vec & y, // input data - const arma::vec & group, // group assignment for each observation in the vector y - int maxK, // maximum number of distributional clusters - int maxL, // maximum number of observational clusters - double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) - double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) - bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? - double alpha, double beta, // Dirichlet parameters if fixed - double hyp_alpha1, double hyp_alpha2, // hyperparameters of alpha ( par of the distributional DP ) - double hyp_beta, // hyperparameter of the Gamma prior for the observational Dirichlet - arma::vec mu_start, // starting point - arma::vec sigma2_start, - arma::vec M_start, - arma::vec S_start, - double alpha_start, double beta_start, - double eps_beta, // MH step on beta - bool progressbar - ) -{ - int N = y.n_elem ; - arma::vec unique_groups = arma::unique(group) ; - int G = unique_groups.n_elem ; - - // allocate output matrices - arma::mat mu(maxL, nrep, arma::fill::zeros) ; - arma::mat sigma2(maxL, nrep, arma::fill::ones) ; - arma::mat out_S(G, nrep) ; - arma::mat out_M(N, nrep) ; - arma::mat pi(maxK, nrep, arma::fill::zeros) ; - arma::cube omega(maxL, maxK, nrep, arma::fill::zeros) ; - arma::vec out_alpha(nrep) ; // DP parameter for the distributional weights - arma::vec out_beta(nrep) ; // Dirichlet parameter for the observational weights - arma::vec out_maxK(nrep) ; - - // initialization - mu.col(0) = mu_start ; - sigma2.col(0) = sigma2_start ; - out_M.col(0) = M_start ; - out_S.col(0) = S_start ; - out_maxK(0) = maxK ; // qui -1 missing? - pi.col(0).fill(1/(maxK*1.0)) ; - omega.slice(0).fill(1/(maxL*1.0)) ; - - Rcpp::List out_params ; - Rcpp::List tmp_obs_cl ; - Rcpp::List weights_slice_sampler ; - - if(fixed_alpha) { - out_alpha.fill(alpha) ; - } else { - out_alpha(0) = alpha_start ; - } - - if(fixed_beta) { - out_beta.fill(beta) ; - } else { - out_beta(0) = beta_start ; - } - - arma::vec clusterD_long(N) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), 0 ) ; } - arma::vec xi(maxK) ; - for(int k = 0; k < maxK; k++) { xi(k) = fun_xi(0.5, k+1) ; } - - bool warningK = false ; arma::vec iters_maxK(nrep-1) ; int countK = 0 ; - - // progress bar - bool display_progress = progressbar ; - Progress p(nrep, display_progress) ; - - // START - for(int iter = 0; iter < nrep -1 ; iter++) - { - if( Progress::check_abort() ) { return -1.0 ; } - p.increment(); - - /*---------------------------------------------*/ - /* - * UPDATE MIXTURE WEIGHTS - */ - - weights_slice_sampler = ficam_weights_update_slice_sampler(group, - out_S.col(iter), - out_alpha(iter), - xi, - maxK) ; - - arma::vec tmp_pi = weights_slice_sampler["new_pi"] ; - pi.col(iter+1) = tmp_pi ; - - int maxK_new = weights_slice_sampler["new_maxK"] ; - out_maxK(iter+1) = maxK_new ; - if(out_maxK(iter + 1) >= maxK) { - warningK = true ; - iters_maxK(countK) = (iter + 1) ; - countK++ ; - out_maxK(iter + 1) = maxK-1 ; } - - arma::vec u_D = weights_slice_sampler["u_D"] ; - - omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter), - clusterD_long, - out_beta(iter)*maxL, - out_maxK(iter+1), maxL, // checked, added +1 - maxK, maxL) ; - - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE CLUSTER ASSIGNMENT - */ - /* update distributional clusters S */ - out_S.col(iter+1) = slicedDP_sample_distr_cluster(group, - out_M.col(iter), - pi.col(iter+1), omega.slice(iter+1), - u_D, xi, - out_maxK(iter+1)) ; - for(int i = 0; i < N; i++) { clusterD_long(i) = out_S( group(i), iter+1 ) ; } - - /* update observational clusters M */ - out_M.col(iter+1) = sample_obs_cluster(y, - clusterD_long, - omega.slice(iter+1), - out_maxK(iter+1), maxL, - mu.col(iter), sigma2.col(iter)) ; - - /*---------------------------------------------*/ - - - /*---------------------------------------------*/ - /* - * UPDATE PARAMETERS - */ - out_params = sample_model_parameters(y, out_M.col(iter+1), - maxL, - m0, tau0, lambda0, gamma0) ; - - arma::vec tmp_mu = out_params["out_mu"] ; - mu.col(iter+1) = tmp_mu ; - - arma::vec tmp_sigma2 = out_params["out_sigma2"] ; - sigma2.col(iter+1) = tmp_sigma2 ; - - /*---------------------------------------------*/ - - /*---------------------------------------------*/ - /* - * UPDATE DIRICHLET HYPER-PARAMETERS - */ - /* sample alpha */ - if(!fixed_alpha) { - out_alpha(iter+1) = sample_alpha(out_alpha(iter), - hyp_alpha1, hyp_alpha2, - G, out_S.col(iter+1)) ; - } - - /* sample beta */ - if(!fixed_beta) { - out_beta(iter+1) = overcam_MH_beta(out_beta(iter), eps_beta, omega.slice(iter+1), hyp_beta) ; - } - - /*---------------------------------------------*/ - - - // END - } - - Rcpp::List warnings ; - if(warningK) { - warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); - } - - return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), - Rcpp::Named("sigma2") = sigma2.t(), - Rcpp::Named("obs_cluster") = out_M.t(), - Rcpp::Named("distr_cluster") = out_S.t(), - Rcpp::Named("pi") = pi.t(), - Rcpp::Named("omega") = omega, - Rcpp::Named("alpha") = out_alpha, - Rcpp::Named("beta") = out_beta, - Rcpp::Named("maxK") = out_maxK, - Rcpp::Named("warnings") = warnings); -} diff --git a/src/main_overficam_burn.cpp b/src/main_overficam_burn.cpp new file mode 100644 index 0000000..757e545 --- /dev/null +++ b/src/main_overficam_burn.cpp @@ -0,0 +1,238 @@ +#include +#include "funs_overcam.h" +#include "funs_cam.h" +#include "funs_fcam.h" +#include "funs_ficam.h" +#include "funs_san.h" + +// [[Rcpp::depends(RcppProgress)]] +#include +#include + +// [[Rcpp::export]] +Rcpp::List sample_overficam_burn(int nrep, // number of replications of the Gibbs sampler + int burn, + const arma::vec & y, // input data + const arma::vec & group, // group assignment for each observation in the vector y + int maxK, // maximum number of distributional clusters + int maxL, // maximum number of observational clusters + double m0, double tau0, // hyperparameters on the N-iG prior on the mean parameter, mu|sigma2 ~ N(m0, sigma2 / tau0) + double lambda0, double gamma0, // hyperparameters on the N-iG prior on the variance parameter, 1/sigma2 ~ Gamma(lambda0, gamma0) + bool fixed_alpha, bool fixed_beta, // do you want fixed alpha or beta? + double alpha, double beta, // Dirichlet parameters if fixed + double hyp_alpha1, double hyp_alpha2, // hyperparameters of alpha ( par of the distributional DP ) + double hyp_beta, // hyperparameter of the Gamma prior for the observational Dirichlet + arma::vec mu_start, // starting point + arma::vec sigma2_start, + arma::vec M_start, + arma::vec S_start, + double alpha_start, double beta_start, + double eps_beta, // MH step on beta + bool progressbar +) +{ + int N = y.n_elem ; + arma::vec unique_groups = arma::unique(group) ; + int G = unique_groups.n_elem ; + + + // allocate output matrices + arma::mat mu(maxL, nrep - burn, arma::fill::zeros) ; + arma::mat sigma2(maxL, nrep - burn, arma::fill::ones) ; + arma::mat out_S(G, nrep - burn) ; // distributional clusters + arma::mat out_M(N, nrep - burn) ; // observational clusters + arma::mat out_pi(maxK, nrep - burn, arma::fill::zeros) ; + arma::cube out_omega(maxL, maxK, nrep - burn, arma::fill::zeros) ; + arma::vec out_alpha(nrep - burn) ; // DP parameter for the distributional weights + arma::vec out_beta(nrep - burn) ; // DP parameter for the observational weights + arma::vec out_maxK(nrep - burn) ; + + // initialization + arma::vec current_mu = mu_start ; + arma::vec current_sigma2 = sigma2_start ; + arma::vec current_M = M_start; + arma::vec current_S = S_start; + arma::vec current_pi(maxK) ; + current_pi.fill(1.0/(maxK)); + arma::mat current_omega(maxL,maxK) ; + current_omega.fill(1.0/(maxL)); + int current_maxK = maxK ; + + Rcpp::List out_params ; + Rcpp::List tmp_obs_cl ; + Rcpp::List weights_slice_sampler ; + + double current_beta = 0; + double current_alpha = 0; + + if(fixed_alpha) { + out_alpha.fill(alpha) ; + current_alpha = alpha_start ; + } else { + current_alpha = alpha_start ; + } + + if(fixed_beta) { + out_beta.fill(beta) ; + current_beta = beta_start; + } else { + current_beta = beta_start ; + } + + arma::vec clusterD_long(N) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i)) ; } + arma::vec xi(maxK) ; + for(int k = 0; k < maxK; k++) { xi(k) = fun_xi(0.5, k+1) ; } + + bool warningK = false ; + arma::vec iters_maxK(nrep) ; // here changed + int countK = 0 ; + + // progress bar + bool display_progress = progressbar ; + Progress p(nrep, display_progress) ; + + // START + for(int iter = 0; iter < nrep ; iter++) + { + if( Progress::check_abort() ) { return -1.0 ; } + p.increment(); + + /*---------------------------------------------*/ + /* + * UPDATE MIXTURE WEIGHTS + */ + + + weights_slice_sampler = ficam_weights_update_slice_sampler(group, + current_S, + current_alpha, + xi, + maxK) ; + + + arma::vec tmp_pi = weights_slice_sampler["new_pi"] ; + current_pi = tmp_pi ; + + int maxK_new = weights_slice_sampler["new_maxK"] ; + current_maxK = maxK_new ; + if(current_maxK >= maxK) { + warningK = true ; + iters_maxK(countK) = (iter + 1) ; + countK++ ; + current_maxK = maxK-1 ; } + + arma::vec u_D = weights_slice_sampler["u_D"] ; + + current_omega = dirichlet_sample_obs_weights(current_M, + clusterD_long, + current_beta*maxL, + current_maxK, maxL, // checked, added +1 + maxK, maxL) ; + + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE CLUSTER ASSIGNMENT + */ + /* update distributional clusters S */ + current_S = slicedDP_sample_distr_cluster(group, + current_M, + current_pi, + current_omega, + u_D, xi, + current_maxK) ; + for(int i = 0; i < N; i++) { clusterD_long(i) = current_S( group(i)) ; } + + /* update observational clusters M */ + current_M = sample_obs_cluster(y, + clusterD_long, + current_omega, + current_maxK, maxL, + current_mu, + current_sigma2) ; + + /*---------------------------------------------*/ + + + /*---------------------------------------------*/ + /* + * UPDATE PARAMETERS + */ + out_params = sample_model_parameters(y, current_M, + maxL, + m0, tau0, lambda0, gamma0) ; + + arma::vec tmp_mu = out_params["out_mu"] ; + current_mu = tmp_mu ; + + arma::vec tmp_sigma2 = out_params["out_sigma2"] ; + current_sigma2 = tmp_sigma2 ; + + /*---------------------------------------------*/ + + /*---------------------------------------------*/ + /* + * UPDATE DIRICHLET HYPER-PARAMETERS + */ + /* sample alpha */ + if(!fixed_alpha) { + current_alpha = sample_alpha(current_alpha, + hyp_alpha1, hyp_alpha2, + G, current_S) ; + } + + /* sample beta */ + if(!fixed_beta) { + current_beta = overcam_MH_beta(current_beta, eps_beta, + current_omega, hyp_beta) ; + } + + + /*---------------------------------------------*/ + + // saving + + if(iter >= burn){ + int ind = iter-burn; + //Rcpp::Rcout << ind << "\n"; + + mu.col(ind) = current_mu; + sigma2.col(ind) = current_sigma2; + //Rcpp::Rcout << "a"; + + out_alpha(ind) = current_alpha; + out_beta(ind) = current_beta; + //Rcpp::Rcout << "c"; + + out_pi.col(ind) = current_pi; + out_omega.slice(ind) = current_omega; + // Rcpp::Rcout << "d"; + + out_S.col(ind) = current_S; + out_M.col(ind) = current_M; + // Rcpp::Rcout << "e"; + + } + + // END + } + + Rcpp::List warnings ; + if(warningK) { + warnings = Rcpp::List::create(Rcpp::Named("top_maxK") = iters_maxK.head(countK) ); + } + + return Rcpp::List::create(Rcpp::Named("mu") = mu.t(), + Rcpp::Named("sigma2") = sigma2.t(), + Rcpp::Named("obs_cluster") = out_M.t(), + Rcpp::Named("distr_cluster") = out_S.t(), + Rcpp::Named("pi") = out_pi.t(), + Rcpp::Named("omega") = out_omega, + Rcpp::Named("alpha") = out_alpha, + Rcpp::Named("beta") = out_beta, + Rcpp::Named("maxK") = out_maxK, + Rcpp::Named("warnings") = warnings); +}