Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Fradenti committed May 13, 2024
1 parent 6845cc0 commit 4bdc228
Show file tree
Hide file tree
Showing 12 changed files with 96 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SANple
Type: Package
Title: Fitting Shared Atoms Nested Models via Markov Chains Monte Carlo
Version: 0.1.0.9000
Version: 0.1.1
Date: 2023-10-09
Authors@R: c(
person("Laura", "D'Angelo", ,"laura.dangelo@live.com",
Expand Down
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# SANple (development version)

* ...
-futura versione 0.1.1

* Improve efficiency of stick-breaking computation
* Improve initialization of the algorithms
* Changed cpp for loops indexes from int to unsigned int when needed
* ha senso chiamare gli header di tutti ovunque? credo sia questo che renda il pacchetto pesantissimo
* Fixed a bug in the full conditional of the concentration parameter for the observational DP of CAM

# SANple 0.1.0

Expand Down
2 changes: 1 addition & 1 deletion R/mcmc_fiSAN.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ sample_fiSAN <- function(nrep, y, group,

if(is.null(nclus_start)) { nclus_start <- min(c(maxL, 30))}
M_start <- stats::kmeans(y,
centers <- nclus_start,
centers = nclus_start,
algorithm="MacQueen",
iter.max = 50)$cluster

Expand Down
2 changes: 1 addition & 1 deletion src/common_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ arma::vec relabel_arma(arma::vec cluster)
for(int k = 0; k < n_distinct ; k++) {
lab = uniquecl[k] ;
arma::uvec indlab = find(cluster == lab) ;
for(int h = 0; h < indlab.n_elem; h++) { newlabels[indlab[h]] = k ; }
for(unsigned int h = 0; h < indlab.n_elem; h++) { newlabels[indlab[h]] = k ; }
}
} else {
newlabels = cluster ;
Expand Down
64 changes: 56 additions & 8 deletions src/funs_cam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ arma::vec stick_breaking(arma::vec beta_var)
}
return(out) ;
}

/* older version - to be removed
arma::vec stick_breaking(arma::vec beta_var)
{
Expand All @@ -51,7 +51,7 @@ arma::vec stick_breaking(arma::vec beta_var)
}
return(out) ;
}
*/
*/


// This function performs steps 1-4 of Algorithm 1 of Denti et al. (2021):
Expand Down Expand Up @@ -178,7 +178,7 @@ Rcpp::List weights_update_slice_sampler(const arma::vec& y, const arma::vec& gro

// This function performs step 5 of Algorithm 1 of Denti et al. (2021):
// sample distributional cluster allocation S_j
arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& group,
arma::vec slicedDP_sample_distr_cluster(const arma::vec& group,
arma::vec M_iter,
arma::vec pi, arma::mat omega,
arma::vec u_D,
Expand All @@ -193,7 +193,11 @@ arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& gro
arma::vec probD(maxK_iter) ;
arma::vec out(G) ;
double sumdens ;

//Rcpp::Rcout<< omega;
//Rcpp::Rcout<< "\n-----\n";
//Rcpp::Rcout<< M_iter;


// S_j is categorical, with Pr( S_j = k | - ) = I(u_j < xi_k) pi_k/xi_k * ( om_1k^#(M_ij = 1) ... om_Lk^#(M_ij = L) )
// hence the log is: log(pi_k) - log(xi_k) + sum_l #(M_ij = l) * log( om_lk )
for(int j = 0; j < G; j++)
Expand All @@ -203,7 +207,7 @@ arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& gro

for(int k = 0; k < maxK_iter; k++) // I have to compute the prob for each k = 1,..., K_iter
{
for(int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; }
for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; }
sumdens = arma::accu(mixdens) ;
if(!arma::is_finite(sumdens)) { sumdens = log(0) ;}
probD(k) = log( pi(k) ) - log( xi(k) ) + sumdens + log(u_D(j) < xi(k));
Expand All @@ -217,7 +221,52 @@ arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& gro
return(out) ;
}


// This function performs step 5 of Algorithm 1 of Denti et al. (2021):
// sample distributional cluster allocation S_j
arma::vec slicedDP_sample_distr_cluster2(const arma::vec& group,
arma::vec M_iter,
arma::vec pi, arma::mat omega,
arma::vec u_D,
arma::vec xi,
int maxK_iter)
{

arma::vec unique_groups = arma::unique(group) ;
int G = unique_groups.n_elem ;

arma::vec distr_cluster_id = arma::linspace(0, maxK_iter-1, maxK_iter) ;
arma::vec probD(maxK_iter) ;
arma::vec out(G) ;
double sumdens ;
// S_j is categorical, with Pr( S_j = k | - ) = I(u_j < xi_k) pi_k/xi_k * ( om_1k^#(M_ij = 1) ... om_Lk^#(M_ij = L) )
// hence the log is: log(pi_k) - log(xi_k) + sum_l #(M_ij = l) * log( om_lk )
for(int j = 0; j < G; j++)
{
arma::uvec ind_group_j = find(group == j) ; // restrict to observations in the j-th population
arma::vec mixdens(ind_group_j.n_elem) ;
probD.zeros();
for(int k = 0; k < maxK_iter; k++) // I have to compute the prob for each k = 1,..., K_iter
{
for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; }
sumdens = arma::accu(mixdens) ;
if(!arma::is_finite(sumdens)) { sumdens = log(0) ;}
probD(k) = log( pi(k) ) - log( xi(k) ) + sumdens + log(u_D(j) < xi(k));
}
double tmp = max(probD) ;
// sanity check
arma::uvec check = find( (probD) == -arma::math::inf() );
int cne = check.n_elem;
if(cne==maxK_iter){
Rcpp::Rcout<<"*";
probD.fill(1.0/maxK_iter);
}else{
probD = exp(probD - tmp) ; // removed useless for loop
}
out(j) = sample_i(distr_cluster_id, probD) ;
}

return(out) ;
}

// This function performs step 6 of Algorithm 1 of Denti et al. (2021):
// sample observational cluster allocation M_ij
Expand Down Expand Up @@ -301,8 +350,7 @@ double sample_beta(double hyp_beta1, double hyp_beta2,
bar_Ms(k) = subM.max() ;
for(int l = 0; l < subM.max(); l++)
{
arma::uvec ind_clusterO_l = find(subM == l) ;
if(ind_clusterO_l.n_elem > 0) { sum_log_mbeta = sum_log_mbeta + log( 1-obs_beta_rv(l,k) ) ; }
sum_log_mbeta = sum_log_mbeta + log( 1-obs_beta_rv(l,k) ) ;
}
}
}
Expand Down
10 changes: 9 additions & 1 deletion src/funs_cam.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@ Rcpp::List weights_update_slice_sampler(const arma::vec& y, const arma::vec& gro
double & alpha, double & beta,
int & maxK, int & maxL) ;

arma::vec slicedDP_sample_distr_cluster(const arma::vec& y, const arma::vec& group,
arma::vec slicedDP_sample_distr_cluster(const arma::vec& group,
arma::vec M_iter,
arma::vec pi, arma::mat omega,
arma::vec u_D,
arma::vec xi,
int maxK_iter) ;
// This function performs step 5 of Algorithm 1 of Denti et al. (2021):
// sample distributional cluster allocation S_j
arma::vec slicedDP_sample_distr_cluster2(const arma::vec& group,
arma::vec M_iter,
arma::vec pi, arma::mat omega,
arma::vec u_D,
arma::vec xi,
int maxK_iter);

arma::vec slicedDP_sample_obs_cluster(const arma::vec& y,
arma::vec clusterD_long,
Expand Down
6 changes: 3 additions & 3 deletions src/funs_fcam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ arma::vec fcam_sample_distr_cluster(const arma::vec& y, const arma::vec& group,
for(int k = 0; k < K_iter; k++) // I have to compute the prob for each k = 1,..., maxK
{
mixdens.fill(0) ;
for(int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; }
for(unsigned int i = 0; i < ind_group_j.n_elem; i++) { mixdens(i) = log( omega(M_iter(ind_group_j(i)), k) ) ; }
sumdens = arma::accu(mixdens) ;
if(!arma::is_finite(sumdens)) { sumdens = log(0) ;}
probD(k) = log( pi(k) ) + sumdens ;
Expand Down Expand Up @@ -132,13 +132,13 @@ int fcam_sample_K(int maxK,

arma::vec prob2(K_id.n_elem) ;
arma::vec prob(K_id.n_elem) ;
for(int k = 0; k < K_id.n_elem ; k++) {
for(unsigned int k = 0; k < K_id.n_elem ; k++) {
prob(k) = logpost_maxx( Kplus + k ,
hyp_max1, hyp_max2, hyp_max3,
d_par, cluster, Kplus ) ;
}
double minprob = prob.min() ;
for(int k = 0; k < K_id.n_elem ; k++) {
for(unsigned int k = 0; k < K_id.n_elem ; k++) {
prob2(k) = exp(prob(k) - minprob) ;
}

Expand Down
2 changes: 1 addition & 1 deletion src/funs_san.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ arma::vec sample_distr_cluster(const arma::vec& y, const arma::vec& group,

for(int k = 0; k < K_iter; k++) // I have to compute the prob for each k = 1,..., maxK
{
for(int i = 0; i < ind_group_j.n_elem; i++)
for(unsigned int i = 0; i < ind_group_j.n_elem; i++)
{
mixdens(i) = 0 ;
for(int l = 0; l < L_iter; l++) {
Expand Down
2 changes: 1 addition & 1 deletion src/main_cam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ Rcpp::List sample_cam_arma(int nrep, // number of replications of the Gibbs samp
*/

/* update distributional clusters S */
out_S.col(iter+1) = slicedDP_sample_distr_cluster(y, group,
out_S.col(iter+1) = slicedDP_sample_distr_cluster(group,
out_M.col(iter),
pi.col(iter+1), omega.slice(iter+1),
u_D, xi,
Expand Down
19 changes: 12 additions & 7 deletions src/main_ficam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa
sigma2.col(0) = sigma2_start ;
out_M.col(0) = M_start ;
out_S.col(0) = S_start ;
out_maxK(0) = maxK -1 ;
out_L(0) = maxL -1 ;
out_maxK(0) = maxK; // checked: removed - 1
out_L(0) = maxL; // checked: removed - 1
///
out_Lplus(0) = out_M.col(0).max() + 1 ;
pi.col(0) = arma::ones(maxK)/maxK ;
omega.slice(0) = arma::ones(maxL, maxK) / maxL ;

Rcpp::List tmp_obs_cl ;
Rcpp::List out_params ;
Rcpp::List weights_slice_sampler ;
arma::vec u_D(G, arma::fill::zeros) ;
//arma::vec u_D(G, arma::fill::zeros) ;

if(fixed_alpha) {
out_alpha.fill(alpha) ;
Expand Down Expand Up @@ -120,7 +121,7 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa
omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter),
clusterD_long,
out_beta(iter),
out_maxK(iter), out_L(iter),
out_maxK(iter+1), out_L(iter), // checked: added iter + 1
maxK, maxL) ;

// pi.col(iter+1) = pi.col(iter) ;
Expand All @@ -132,7 +133,9 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa
* UPDATE CLUSTER ASSIGNMENT
*/
/* update distributional clusters S */
out_S.col(iter+1) = slicedDP_sample_distr_cluster(y, group,
/// !!!!!! attention, I changed the function here!
// out_S.col(iter+1) = slicedDP_sample_distr_cluster2(group,
out_S.col(iter+1) = slicedDP_sample_distr_cluster(group,
out_M.col(iter),
pi.col(iter+1), omega.slice(iter+1),
u_D, xi,
Expand Down Expand Up @@ -171,7 +174,7 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa
* UPDATE THE NUMBER OF COMPONENTS
*/
/* update observational clusters out_L */
out_L(iter + 1) = fcam_sample_K(maxL,
out_L(iter + 1) = fcam_sample_K(maxL -1, // checked: added - 1
out_Lplus(iter+1),
1, 4, 3,
out_beta(iter),
Expand All @@ -183,7 +186,9 @@ Rcpp::List sample_ficam_arma(int nrep, // number of replications of the Gibbs sa
countL++ ;
out_L(iter + 1) = maxL-1 ; }

mu(arma::span(out_L(iter+1), maxL-1), arma::span(iter+1)) = arma::zeros(maxL - out_L(iter+1)) ;
mu(arma::span(out_L(iter+1), maxL-1),
arma::span(iter+1)) = // span e' necessario?
arma::zeros(maxL - out_L(iter+1)) ;
sigma2(arma::span(out_L(iter+1), maxL-1), iter+1) = arma::zeros(maxL - out_L(iter+1)) ;

// out_L(iter + 1) = out_L(iter) ;
Expand Down
6 changes: 3 additions & 3 deletions src/main_overficam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibb
sigma2.col(0) = sigma2_start ;
out_M.col(0) = M_start ;
out_S.col(0) = S_start ;
out_maxK(0) = maxK ;
out_maxK(0) = maxK ; // qui -1 missing?
pi.col(0).fill(1/(maxK*1.0)) ;
omega.slice(0).fill(1/(maxL*1.0)) ;

Expand Down Expand Up @@ -114,7 +114,7 @@ Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibb
omega.slice(iter+1) = dirichlet_sample_obs_weights(out_M.col(iter),
clusterD_long,
out_beta(iter)*maxL,
out_maxK(iter+1), maxL,
out_maxK(iter+1), maxL, // checked, added +1
maxK, maxL) ;

/*---------------------------------------------*/
Expand All @@ -125,7 +125,7 @@ Rcpp::List sample_overficam_arma(int nrep, // number of replications of the Gibb
* UPDATE CLUSTER ASSIGNMENT
*/
/* update distributional clusters S */
out_S.col(iter+1) = slicedDP_sample_distr_cluster(y, group,
out_S.col(iter+1) = slicedDP_sample_distr_cluster(group,
out_M.col(iter),
pi.col(iter+1), omega.slice(iter+1),
u_D, xi,
Expand Down

0 comments on commit 4bdc228

Please sign in to comment.