Skip to content

Commit

Permalink
parallel asymptotic gsa
Browse files Browse the repository at this point in the history
  • Loading branch information
borishejblum committed Jan 24, 2024
1 parent db1fa3d commit 634de54
Showing 1 changed file with 56 additions and 28 deletions.
84 changes: 56 additions & 28 deletions R/cit_gsa.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,27 @@
#' @export
#'
#' @examples
#' #TO DO
#' set.seed(123)
#'n <- 500
#'r <- 200
#'Z1 <- rnorm(n)
#'Z2 <- rnorm(n)#rbinom(n, size=1, prob=0.5) + rnorm(n, sd=0.05)
#'X1 <- Z2 + rnorm(n, sd=0.2)
#'X2 <- rnorm(n)
#'cor(X1, Z2)
#'Y <- replicate(r, Z2) + rnorm(n*r, 0, 0.5)
#'range(cor(Y, Z2))
#'range(cor(Y, X2))
#'res_asymp_unadj <- cit_gsa(M = data.frame(Y=Y),
#' X = data.frame(X2=X1),
#' geneset = paste0("Y.", 1:50),
#' test="asymptotic", parallel=FALSE)
#' res_asymp_unadj$pvals
#'
cit_gsa <- function(M,
X,
Z = NULL,
geneset,
test = c("asymptotic","permutation"),
n_perm = 100,
n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
Expand All @@ -90,8 +107,7 @@ cit_gsa <- function(M,
n_cpus = detectCores() - 1,
adaptive = FALSE,
space_y = TRUE,
number_y = 10,
geneset){
number_y = 10){

# checks

Expand Down Expand Up @@ -310,43 +326,51 @@ cit_gsa <- function(M,


# Initialisation for each gene set
test_stat_list <- list()
Sigma2_list <- list()
decomp_list <- list()
pval <- NA
#test_stat_list <- list()
#Sigma2_list <- list()
#decomp_list <- list()
#pval <- NA


n_Y_all <- nrow(M)
H <- n_Y_all*(solve(crossprod(modelmat)) %*% t(modelmat))[indexes_X, , drop=FALSE]
# length of Y, same for each genes because X and Y are the same

if(length(geneset)<3){
pboptions(type="none")
}


n_cpus <- min(length(geneset), n_cpus)

for (k in 1:length(geneset)){ # each list of gene set
res <- pbapply::pblapply(1:length(geneset), function(k){ # each list of gene set


# CHECK 1 : stop if no M column ids match all genes ids in the gene sets
measured_genes <- intersect(M_colnames, geneset[[k]]) # if true, displays message

# Initialisation for each gene in the gene set
test_stat_gs <- NULL
prop_gs <- list()
indi_pi_gs <- list()

# CHECK 1 : stop if no M column ids match all genes ids in the gene sets
measured_genes <- intersect(M_colnames, geneset[[k]]) # if true, displays message

if(length(measured_genes)<1){

warning("0 genes from geneset ", k, " observed in expression data")
pval[k] <- NA
test_stat_list[[k]] <- NA
pval <- NA
test_stat_gs <- NA

}else{
test_stat_gs <- numeric(length(measured_genes))

if(length(measured_genes) < length( geneset[[k]])){
if(length(measured_genes) < length(geneset[[k]])){
warning(" Some genes from geneset ", k, " are not observed in expression data")
}
# code below if all the genes in the gene set k are in M, else pval + stat de test = NA


for (i in measured_genes){ # each gene in the gene set k
for (i in 1:length(measured_genes)){ # each gene in the gene set k

Y <- M[, i]
Y <- M[, measured_genes[i]]


# 1) Test statistic computation ----
Expand Down Expand Up @@ -381,7 +405,6 @@ cit_gsa <- function(M,
}
}

test_stat_list[[k]] <- test_stat_gs
indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
prop_gs_vec <- unlist(prop_gs)
n_g_t <- length(prop_gs_vec)
Expand All @@ -400,21 +423,26 @@ cit_gsa <- function(M,

Sigma2 <- 1/n * tcrossprod(H) %x% (new_prop - prop_gs_vec %x% t(prop_gs_vec))

decomp <- eigen(Sigma2, symmetric=TRUE, only.values=TRUE)


Sigma2_list[[k]] <- Sigma2

decomp_list[[k]] <- eigen(Sigma2_list[[k]], symmetric=TRUE, only.values=TRUE)
pval[k] <- survey::pchisqsum(sum(test_stat_list[[k]]), lower.tail = FALSE, df = rep(1, ncol(Sigma2_list[[k]])),a =decomp_list[[k]]$values , method = "saddlepoint")
}
pval <- survey::pchisqsum(sum(test_stat_gs), lower.tail = FALSE,
df = rep(1, ncol(Sigma2)),
a =decomp$values , method = "saddlepoint")
return(list("pval" = pval, "test_stat_gs" = test_stat_gs))
},
cl = n_cpus)
pboptions(type="timer")

}

pvals <- sapply(res, "[[", "pval")
test_stat_list <- lapply(res, "[[", "test_stat_gs")

df <- data.frame(raw_pval = pval,
adj_pval = p.adjust(pval, method="BH"),
test_statistic = unlist(lapply(test_stat_list, sum)))

df <- data.frame(raw_pval = pvals,
adj_pval = p.adjust(pvals, method="BH"),
test_statistic = sapply(test_stat_list, sum)
)

#rownames(df) <- M_colnames

Expand Down

0 comments on commit 634de54

Please sign in to comment.