From 4bdc228ac782cd51888fa7390b297a92198fde31 Mon Sep 17 00:00:00 2001 From: fradenti Date: Mon, 13 May 2024 12:59:37 +0200 Subject: [PATCH] updates --- .github/workflows/R-CMD-check.yaml | 2 +- DESCRIPTION | 2 +- NEWS.md | 8 +++- R/mcmc_fiSAN.R | 2 +- src/common_functions.cpp | 2 +- src/funs_cam.cpp | 64 ++++++++++++++++++++++++++---- src/funs_cam.h | 10 ++++- src/funs_fcam.cpp | 6 +-- src/funs_san.cpp | 2 +- src/main_cam.cpp | 2 +- src/main_ficam.cpp | 19 +++++---- src/main_overficam.cpp | 6 +-- 12 files changed, 96 insertions(+), 29 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a3ac618..74d8c97 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,7 +29,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/DESCRIPTION b/DESCRIPTION index 9db778b..73520a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: SANple Type: Package Title: Fitting Shared Atoms Nested Models via Markov Chains Monte Carlo -Version: 0.1.0.9000 +Version: 0.1.1 Date: 2023-10-09 Authors@R: c( person("Laura", "D'Angelo", ,"laura.dangelo@live.com", diff --git a/NEWS.md b/NEWS.md index 028c9be..8c4e60b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,12 @@ # SANple (development version) -* ... +-futura versione 0.1.1 + +* Improve efficiency of stick-breaking computation +* Improve initialization of the algorithms +* Changed cpp for loops indexes from int to unsigned int when needed +* ha senso chiamare gli header di tutti ovunque? credo sia questo che renda il pacchetto pesantissimo +* Fixed a bug in the full conditional of the concentration parameter for the observational DP of CAM # SANple 0.1.0 diff --git a/R/mcmc_fiSAN.R b/R/mcmc_fiSAN.R index 03df4ec..d983a02 100644 --- a/R/mcmc_fiSAN.R +++ b/R/mcmc_fiSAN.R @@ -226,7 +226,7 @@ sample_fiSAN <- function(nrep, y, group, if(is.null(nclus_start)) { nclus_start <- min(c(maxL, 30))} M_start <- stats::kmeans(y, - centers <- nclus_start, + centers = nclus_start, algorithm="MacQueen", iter.max = 50)$cluster diff --git a/src/common_functions.cpp b/src/common_functions.cpp index b000983..ef38ab6 100644 --- a/src/common_functions.cpp +++ b/src/common_functions.cpp @@ -38,7 +38,7 @@ arma::vec relabel_arma(arma::vec cluster) for(int k = 0; k < n_distinct ; k++) { lab = uniquecl[k] ; arma::uvec indlab = find(cluster == lab) ; - for(int h = 0; h < indlab.n_elem; h++) { newlabels[indlab[h]] = k ; } + for(unsigned int h = 0; h < indlab.n_elem; h++) { newlabels[indlab[h]] = k ; } } } else { newlabels = cluster ; diff --git a/src/funs_cam.cpp b/src/funs_cam.cpp index 75fd7fa..17b6129 100644 --- a/src/funs_cam.cpp +++ b/src/funs_cam.cpp @@ -33,7 +33,7 @@ arma::vec stick_breaking(arma::vec beta_var) } return(out) ; } - + /* older version - to be removed arma::vec stick_breaking(arma::vec beta_var) { @@ -51,7 +51,7 @@ arma::vec stick_breaking(arma::vec beta_var) } return(out) ; } -*/ + */ // This function performs steps 1-4 of Algorithm 1 of Denti et al. (2021): @@ -178,7 +178,7 @@ Rcpp::List weights_update_slice_sampler(const arma::vec& y, const arma::vec& gro // This function performs step 5 of Algorithm 1 of Denti et al. (2021): // sample distributional cluster allocation S_j -arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& group, +arma::vec slicedDP_sample_distr_cluster(const arma::vec& group, arma::vec M_iter, arma::vec pi, arma::mat omega, arma::vec u_D, @@ -193,7 +193,11 @@ arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& gro arma::vec probD(maxK_iter) ; arma::vec out(G) ; double sumdens ; - + //Rcpp::Rcout<< omega; + //Rcpp::Rcout<< "\n-----\n"; + //Rcpp::Rcout<< M_iter; + + // S_j is categorical, with Pr( S_j = k | - ) = I(u_j < xi_k) pi_k/xi_k * ( om_1k^#(M_ij = 1) ... om_Lk^#(M_ij = L) ) // hence the log is: log(pi_k) - log(xi_k) + sum_l #(M_ij = l) * log( om_lk ) for(int j = 0; j < G; j++) @@ -203,7 +207,7 @@ arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& gro for(int k = 0; k < maxK_iter; k++) // I have to compute the prob for each k = 1,..., K_iter { - for(int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; } + for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; } sumdens = arma::accu(mixdens) ; if(!arma::is_finite(sumdens)) { sumdens = log(0) ;} probD(k) = log( pi(k) ) - log( xi(k) ) + sumdens + log(u_D(j) < xi(k)); @@ -217,7 +221,52 @@ arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& gro return(out) ; } - +// This function performs step 5 of Algorithm 1 of Denti et al. (2021): +// sample distributional cluster allocation S_j +arma::vec slicedDP_sample_distr_cluster2(const arma::vec& group, + arma::vec M_iter, + arma::vec pi, arma::mat omega, + arma::vec u_D, + arma::vec xi, + int maxK_iter) +{ + + arma::vec unique_groups = arma::unique(group) ; + int G = unique_groups.n_elem ; + + arma::vec distr_cluster_id = arma::linspace(0, maxK_iter-1, maxK_iter) ; + arma::vec probD(maxK_iter) ; + arma::vec out(G) ; + double sumdens ; + // S_j is categorical, with Pr( S_j = k | - ) = I(u_j < xi_k) pi_k/xi_k * ( om_1k^#(M_ij = 1) ... om_Lk^#(M_ij = L) ) + // hence the log is: log(pi_k) - log(xi_k) + sum_l #(M_ij = l) * log( om_lk ) + for(int j = 0; j < G; j++) + { + arma::uvec ind_group_j = find(group == j) ; // restrict to observations in the j-th population + arma::vec mixdens(ind_group_j.n_elem) ; + probD.zeros(); + for(int k = 0; k < maxK_iter; k++) // I have to compute the prob for each k = 1,..., K_iter + { + for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; } + sumdens = arma::accu(mixdens) ; + if(!arma::is_finite(sumdens)) { sumdens = log(0) ;} + probD(k) = log( pi(k) ) - log( xi(k) ) + sumdens + log(u_D(j) < xi(k)); + } + double tmp = max(probD) ; + // sanity check + arma::uvec check = find( (probD) == -arma::math::inf() ); + int cne = check.n_elem; + if(cne==maxK_iter){ + Rcpp::Rcout<<"*"; + probD.fill(1.0/maxK_iter); + }else{ + probD = exp(probD - tmp) ; // removed useless for loop + } + out(j) = sample_i(distr_cluster_id, probD) ; + } + + return(out) ; +} // This function performs step 6 of Algorithm 1 of Denti et al. (2021): // sample observational cluster allocation M_ij @@ -301,8 +350,7 @@ double sample_beta(double hyp_beta1, double hyp_beta2, bar_Ms(k) = subM.max() ; for(int l = 0; l < subM.max(); l++) { - arma::uvec ind_clusterO_l = find(subM == l) ; - if(ind_clusterO_l.n_elem > 0) { sum_log_mbeta = sum_log_mbeta + log( 1-obs_beta_rv(l,k) ) ; } + sum_log_mbeta = sum_log_mbeta + log( 1-obs_beta_rv(l,k) ) ; } } } diff --git a/src/funs_cam.h b/src/funs_cam.h index 458cc5a..d32eb5d 100644 --- a/src/funs_cam.h +++ b/src/funs_cam.h @@ -16,12 +16,20 @@ Rcpp::List weights_update_slice_sampler(const arma::vec& y, const arma::vec& gro double & alpha, double & beta, int & maxK, int & maxL) ; -arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& group, +arma::vec slicedDP_sample_distr_cluster(const arma::vec& group, arma::vec M_iter, arma::vec pi, arma::mat omega, arma::vec u_D, arma::vec xi, int maxK_iter) ; +// This function performs step 5 of Algorithm 1 of Denti et al. (2021): +// sample distributional cluster allocation S_j +arma::vec slicedDP_sample_distr_cluster2(const arma::vec& group, + arma::vec M_iter, + arma::vec pi, arma::mat omega, + arma::vec u_D, + arma::vec xi, + int maxK_iter); arma::vec slicedDP_sample_obs_cluster(const arma::vec& y, arma::vec clusterD_long, diff --git a/src/funs_fcam.cpp b/src/funs_fcam.cpp index 2ce4e17..f94c44d 100644 --- a/src/funs_fcam.cpp +++ b/src/funs_fcam.cpp @@ -25,7 +25,7 @@ arma::vec fcam_sample_distr_cluster(const arma::vec& y, const arma::vec& group, for(int k = 0; k < K_iter; k++) // I have to compute the prob for each k = 1,..., maxK { mixdens.fill(0) ; - for(int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; } + for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; } sumdens = arma::accu(mixdens) ; if(!arma::is_finite(sumdens)) { sumdens = log(0) ;} probD(k) = log( pi(k) ) + sumdens ; @@ -132,13 +132,13 @@ int fcam_sample_K(int maxK, arma::vec prob2(K_id.n_elem) ; arma::vec prob(K_id.n_elem) ; - for(int k = 0; k < K_id.n_elem ; k++) { + for(unsigned int k = 0; k < K_id.n_elem ; k++) { prob(k) = logpost_maxx( Kplus + k , hyp_max1, hyp_max2, hyp_max3, d_par, cluster, Kplus ) ; } double minprob = prob.min() ; - for(int k = 0; k < K_id.n_elem ; k++) { + for(unsigned int k = 0; k < K_id.n_elem ; k++) { prob2(k) = exp(prob(k) - minprob) ; } diff --git a/src/funs_san.cpp b/src/funs_san.cpp index 1101c13..2f5c11f 100644 --- a/src/funs_san.cpp +++ b/src/funs_san.cpp @@ -91,7 +91,7 @@ arma::vec sample_distr_cluster(const arma::vec& y, const arma::vec& group, for(int k = 0; k < K_iter; k++) // I have to compute the prob for each k = 1,..., maxK { - for(int i = 0; i < ind_group_j.n_elem; i++) + for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = 0 ; for(int l = 0; l < L_iter; l++) { diff --git a/src/main_cam.cpp b/src/main_cam.cpp index b92c091..4634371 100644 --- a/src/main_cam.cpp +++ b/src/main_cam.cpp @@ -133,7 +133,7 @@ Rcpp::List sample_cam_arma(int nrep, // number of replications of the Gibbs samp */ /* update distributional clusters S */ - out_S.col(iter+1) = slicedDP_sample_distr_cluster(y, group, + out_S.col(iter+1) = slicedDP_sample_distr_cluster(group, out_M.col(iter), pi.col(iter+1), omega.slice(iter+1), u_D, xi, diff --git a/src/main_ficam.cpp b/src/main_ficam.cpp index 4309d86..e59e597 100644 --- a/src/main_ficam.cpp +++ b/src/main_ficam.cpp @@ -49,8 +49,9 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa sigma2.col(0) = sigma2_start ; out_M.col(0) = M_start ; out_S.col(0) = S_start ; - out_maxK(0) = maxK -1 ; - out_L(0) = maxL -1 ; + out_maxK(0) = maxK; // checked: removed - 1 + out_L(0) = maxL; // checked: removed - 1 + /// out_Lplus(0) = out_M.col(0).max() + 1 ; pi.col(0) = arma::ones(maxK)/maxK ; omega.slice(0) = arma::ones(maxL, maxK) / maxL ; @@ -58,7 +59,7 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa Rcpp::List tmp_obs_cl ; Rcpp::List out_params ; Rcpp::List weights_slice_sampler ; - arma::vec u_D(G, arma::fill::zeros) ; + //arma::vec u_D(G, arma::fill::zeros) ; if(fixed_alpha) { out_alpha.fill(alpha) ; @@ -120,7 +121,7 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter), clusterD_long, out_beta(iter), - out_maxK(iter), out_L(iter), + out_maxK(iter+1), out_L(iter), // checked: added iter + 1 maxK, maxL) ; // pi.col(iter+1) = pi.col(iter) ; @@ -132,7 +133,9 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa * UPDATE CLUSTER ASSIGNMENT */ /* update distributional clusters S */ - out_S.col(iter+1) = slicedDP_sample_distr_cluster(y, group, + /// !!!!!! attention, I changed the function here! + // out_S.col(iter+1) = slicedDP_sample_distr_cluster2(group, + out_S.col(iter+1) = slicedDP_sample_distr_cluster(group, out_M.col(iter), pi.col(iter+1), omega.slice(iter+1), u_D, xi, @@ -171,7 +174,7 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa * UPDATE THE NUMBER OF COMPONENTS */ /* update observational clusters out_L */ - out_L(iter + 1) = fcam_sample_K(maxL, + out_L(iter + 1) = fcam_sample_K(maxL -1, // checked: added - 1 out_Lplus(iter+1), 1, 4, 3, out_beta(iter), @@ -183,7 +186,9 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa countL++ ; out_L(iter + 1) = maxL-1 ; } - mu(arma::span(out_L(iter+1), maxL-1), arma::span(iter+1)) = arma::zeros(maxL - out_L(iter+1)) ; + mu(arma::span(out_L(iter+1), maxL-1), + arma::span(iter+1)) = // span e' necessario? + arma::zeros(maxL - out_L(iter+1)) ; sigma2(arma::span(out_L(iter+1), maxL-1), iter+1) = arma::zeros(maxL - out_L(iter+1)) ; // out_L(iter + 1) = out_L(iter) ; diff --git a/src/main_overficam.cpp b/src/main_overficam.cpp index 8d46041..f16bbdb 100644 --- a/src/main_overficam.cpp +++ b/src/main_overficam.cpp @@ -50,7 +50,7 @@ Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibb sigma2.col(0) = sigma2_start ; out_M.col(0) = M_start ; out_S.col(0) = S_start ; - out_maxK(0) = maxK ; + out_maxK(0) = maxK ; // qui -1 missing? pi.col(0).fill(1/(maxK*1.0)) ; omega.slice(0).fill(1/(maxL*1.0)) ; @@ -114,7 +114,7 @@ Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibb omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter), clusterD_long, out_beta(iter)*maxL, - out_maxK(iter+1), maxL, + out_maxK(iter+1), maxL, // checked, added +1 maxK, maxL) ; /*---------------------------------------------*/ @@ -125,7 +125,7 @@ Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibb * UPDATE CLUSTER ASSIGNMENT */ /* update distributional clusters S */ - out_S.col(iter+1) = slicedDP_sample_distr_cluster(y, group, + out_S.col(iter+1) = slicedDP_sample_distr_cluster(group, out_M.col(iter), pi.col(iter+1), omega.slice(iter+1), u_D, xi,