Skip to content

Commit

Permalink
compute the ccdf
Browse files Browse the repository at this point in the history
  • Loading branch information
sara-fallet committed Jun 26, 2024
1 parent 26fdf9f commit 043146a
Showing 1 changed file with 61 additions and 76 deletions.
137 changes: 61 additions & 76 deletions R/cit_gsa.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,19 @@
#' geneset = paste0("Y.", 1:50),
#' test="asymptotic", parallel=FALSE)
#'res_asymp_unadj$pvals
cit_gsa <- function(M,
X,
Z = NULL,
geneset,
test = c("asymptotic","permutation"),
n_perm = 100,
n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
thresholds = c(0.1,0.05,0.01),
parallel = interactive(),
n_cpus = detectCores() - 1,
adaptive = FALSE,
space_y = TRUE,
number_y = 10){
cit_gsa_CCDF <- function(M,
X,
Z = NULL,
geneset,
test = c("asymptotic","permutation"),
n_perm = 100,
n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
thresholds = c(0.1,0.05,0.01),
parallel = interactive(),
n_cpus = detectCores() - 1,
adaptive = FALSE,
space_y = TRUE,
number_y = 10){

# checks

Expand Down Expand Up @@ -344,6 +344,7 @@ cit_gsa <- function(M,

n_cpus <- min(length(geneset), n_cpus)


res <- pbapply::pblapply(1:length(geneset), function(k){ # 1 -- each list of gene set ----

# Initialisation for each gene in the gene set
Expand All @@ -352,7 +353,7 @@ cit_gsa <- function(M,
indi_pi_gs <- list()
ccdf_gw <- list()
ccdf <- list()

measured_genes <- intersect(M_colnames, geneset[[k]])

if(length(measured_genes)<1){ # check 1 : none genes of the current geneset are in M
Expand Down Expand Up @@ -389,7 +390,7 @@ cit_gsa <- function(M,
test_stat <- sum(beta^2) * n_Y_all

test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set


# 2) Pi computation ----
indi_pi <- matrix(NA, n_Y_all, (p-1))
Expand All @@ -404,93 +405,77 @@ cit_gsa <- function(M,
prop_gs[[i]] <- prop # prop for each genes in the gene set


ccdf_gw[[i]] <- ccdf(Y=Y, X=X, Z=Z, method="OLS", fast=TRUE, space_y=FALSE, number_y=10)
ccdf_gw[[i]] <- ccdf(Y=Y, X=X, Z=Z, method="OLS", fast=TRUE, space_y=space_y, number_y=number_y)



} # ICIIIIIIIIIIII pour ccdf !!!!!................................................

}
ccdf[[k]] <- ccdf_gw



indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
prop_gs_vec <- unlist(prop_gs)
n_g_t <- length(prop_gs_vec)

# 3) Sigma matrix creation ----
Sigma2 <- matrix(NA, n_g_t*nrow(H), n_g_t*nrow(H))
new_prop <- matrix(NA, n_g_t, n_g_t)


n_gs_vec <- nrow(indi_pi_gs_tab)
temp <- indi_pi_gs_tab - matrix(prop_gs_vec, nrow=n_gs_vec, ncol=n_g_t, byrow=TRUE)

for (i in 1:nrow(new_prop)){ # new prop/new pi computation = the one of the gene set, here it's a matrix
new_prop[i, ] <- (temp[, i]%*%temp)/n_gs_vec + prop_gs_vec[i]*prop_gs_vec
}

Sigma2 <- 1/n * tcrossprod(H) %x% (new_prop - prop_gs_vec %x% t(prop_gs_vec))

decomp <- eigen(Sigma2, symmetric=TRUE, only.values=TRUE)

pval <- survey::pchisqsum(sum(test_stat_gs), lower.tail = FALSE,
df = rep(1, ncol(Sigma2)),
a =decomp$values , method = "saddlepoint")

names(ccdf[[k]]) <- measured_genes # not geneset, if some genes are not in the data



indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
prop_gs_vec <- unlist(prop_gs)
n_g_t <- length(prop_gs_vec)

# 3) Sigma matrix creation ----
Sigma2 <- matrix(NA, n_g_t*nrow(H), n_g_t*nrow(H))
new_prop <- matrix(NA, n_g_t, n_g_t)


n_gs_vec <- nrow(indi_pi_gs_tab)
temp <- indi_pi_gs_tab - matrix(prop_gs_vec, nrow=n_gs_vec, ncol=n_g_t, byrow=TRUE)

for (i in 1:nrow(new_prop)){ # new prop/new pi computation = the one of the gene set, here it's a matrix
new_prop[i, ] <- (temp[, i]%*%temp)/n_gs_vec + prop_gs_vec[i]*prop_gs_vec
}

Sigma2 <- 1/n * tcrossprod(H) %x% (new_prop - prop_gs_vec %x% t(prop_gs_vec))

decomp <- eigen(Sigma2, symmetric=TRUE, only.values=TRUE)

pval <- survey::pchisqsum(sum(test_stat_gs), lower.tail = FALSE,
df = rep(1, ncol(Sigma2)),
a =decomp$values , method = "saddlepoint")



return(list("pval" = pval, "test_stat_gs" = test_stat_gs,"ccdf" = ccdf))


return(list("pval" = pval, "test_stat_gs" = test_stat_gs,"ccdf" = ccdf))



},

},
cl = n_cpus)
pboptions(type="timer")




pvals <- sapply(res, "[[", "pval")

test_stat_list <- lapply(res, "[[", "test_stat_gs")

ccdf <- lapply(res, "[[", "ccdf")

ccdf_final <- lapply(seq_along(ccdf[[1]][[1]]), function(i) ccdf[[1]][[1]][[i]])
names(ccdf_final) <- names(ccdf[[1]][[1]])

}

df <- data.frame(raw_pval = pvals,
adj_pval = p.adjust(pvals, method="BH"),
test_statistic = sapply(test_stat_list, sum))

#rownames(df) <- M_colnames


output <- list(which_test = test,
n_perm = n_perm,
pvals = df,
ccdf = ccdf)
ccdf = ccdf_final)

class(output) <- "citcdf"
output$type <- "gsa"
return(output)

}




















0 comments on commit 043146a

Please sign in to comment.