compute the ccdf

sistm · Jun 26, 2024 · 043146a · 043146a
1 parent 26fdf9f
commit 043146a
Showing 1 changed file with 61 additions and 76 deletions.
diff --git a/R/cit_gsa.R b/R/cit_gsa.R
@@ -94,19 +94,19 @@
 #'                            geneset = paste0("Y.", 1:50),
 #'                            test="asymptotic", parallel=FALSE)
 #'res_asymp_unadj$pvals   
-cit_gsa <- function(M,
-                    X,
-                    Z = NULL,
-                    geneset,
-                    test = c("asymptotic","permutation"),
-                    n_perm = 100,
-                    n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
-                    thresholds = c(0.1,0.05,0.01),
-                    parallel = interactive(),
-                    n_cpus = detectCores() - 1,
-                    adaptive = FALSE,
-                    space_y = TRUE,
-                    number_y = 10){
+cit_gsa_CCDF <- function(M,
+                         X,
+                         Z = NULL,
+                         geneset,
+                         test = c("asymptotic","permutation"),
+                         n_perm = 100,
+                         n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
+                         thresholds = c(0.1,0.05,0.01),
+                         parallel = interactive(),
+                         n_cpus = detectCores() - 1,
+                         adaptive = FALSE,
+                         space_y = TRUE,
+                         number_y = 10){
 
   # checks
 
@@ -344,6 +344,7 @@ cit_gsa <- function(M,
 
     n_cpus <- min(length(geneset), n_cpus)
 
+
     res <- pbapply::pblapply(1:length(geneset), function(k){ # 1 -- each list of gene set ----
 
       # Initialisation for each gene in the gene set
@@ -352,7 +353,7 @@ cit_gsa <- function(M,
       indi_pi_gs <- list()
       ccdf_gw <- list()
       ccdf <- list()
-
+      
       measured_genes <- intersect(M_colnames, geneset[[k]])
 
       if(length(measured_genes)<1){ # check 1 : none genes of the current geneset are in M
@@ -389,7 +390,7 @@ cit_gsa <- function(M,
           test_stat <- sum(beta^2) * n_Y_all
 
           test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set 
-
+          
 
           # 2) Pi computation ----
           indi_pi <- matrix(NA, n_Y_all, (p-1)) 
@@ -404,93 +405,77 @@ cit_gsa <- function(M,
           prop_gs[[i]] <- prop # prop for each genes in the gene set 
 
 
-          ccdf_gw[[i]] <- ccdf(Y=Y, X=X, Z=Z, method="OLS", fast=TRUE, space_y=FALSE, number_y=10)
+          ccdf_gw[[i]] <- ccdf(Y=Y, X=X, Z=Z, method="OLS", fast=TRUE, space_y=space_y, number_y=number_y)
+
 
-
         } # ICIIIIIIIIIIII  pour ccdf !!!!!................................................
 
       }
       ccdf[[k]] <- ccdf_gw
-
-
-
-        indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
-        prop_gs_vec <- unlist(prop_gs)
-        n_g_t <- length(prop_gs_vec)
-
-        # 3) Sigma matrix creation ----
-        Sigma2 <- matrix(NA, n_g_t*nrow(H), n_g_t*nrow(H)) 
-        new_prop <- matrix(NA, n_g_t, n_g_t)
-
-
-        n_gs_vec <- nrow(indi_pi_gs_tab)
-        temp <- indi_pi_gs_tab - matrix(prop_gs_vec, nrow=n_gs_vec, ncol=n_g_t, byrow=TRUE)
-
-        for (i in 1:nrow(new_prop)){  # new prop/new pi computation = the one of the gene set, here it's a matrix 
-          new_prop[i, ] <- (temp[, i]%*%temp)/n_gs_vec + prop_gs_vec[i]*prop_gs_vec
-        }
-
-        Sigma2 <- 1/n * tcrossprod(H) %x%  (new_prop - prop_gs_vec %x%  t(prop_gs_vec))
-
-        decomp <- eigen(Sigma2, symmetric=TRUE, only.values=TRUE)
-
-        pval <- survey::pchisqsum(sum(test_stat_gs), lower.tail = FALSE, 
-                                  df = rep(1, ncol(Sigma2)), 
-                                  a =decomp$values , method = "saddlepoint")
-
+      names(ccdf[[k]]) <- measured_genes # not geneset, if some genes are not in the data
+
+
+
+      indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
+      prop_gs_vec <- unlist(prop_gs)
+      n_g_t <- length(prop_gs_vec)
+
+      # 3) Sigma matrix creation ----
+      Sigma2 <- matrix(NA, n_g_t*nrow(H), n_g_t*nrow(H)) 
+      new_prop <- matrix(NA, n_g_t, n_g_t)
+
+
+      n_gs_vec <- nrow(indi_pi_gs_tab)
+      temp <- indi_pi_gs_tab - matrix(prop_gs_vec, nrow=n_gs_vec, ncol=n_g_t, byrow=TRUE)
+
+      for (i in 1:nrow(new_prop)){  # new prop/new pi computation = the one of the gene set, here it's a matrix 
+        new_prop[i, ] <- (temp[, i]%*%temp)/n_gs_vec + prop_gs_vec[i]*prop_gs_vec
+      }
+
+      Sigma2 <- 1/n * tcrossprod(H) %x%  (new_prop - prop_gs_vec %x%  t(prop_gs_vec))
+
+      decomp <- eigen(Sigma2, symmetric=TRUE, only.values=TRUE)
+
+      pval <- survey::pchisqsum(sum(test_stat_gs), lower.tail = FALSE, 
+                                df = rep(1, ncol(Sigma2)), 
+                                a =decomp$values , method = "saddlepoint")
+
+
+
+      return(list("pval" = pval, "test_stat_gs" = test_stat_gs,"ccdf" = ccdf))
 
-
-        return(list("pval" = pval, "test_stat_gs" = test_stat_gs,"ccdf" = ccdf))
-
-
 
-      },
+
+    },
     cl = n_cpus)
     pboptions(type="timer")
-
+
+
 
     pvals <- sapply(res, "[[", "pval")
 
     test_stat_list <- lapply(res, "[[", "test_stat_gs")
-
+    
     ccdf <- lapply(res, "[[", "ccdf")
-
+    ccdf_final <- lapply(seq_along(ccdf[[1]][[1]]), function(i) ccdf[[1]][[1]][[i]])
+    names(ccdf_final) <- names(ccdf[[1]][[1]])
+
   }
-
+  
   df <- data.frame(raw_pval = pvals,
                    adj_pval = p.adjust(pvals, method="BH"),
                    test_statistic = sapply(test_stat_list, sum))
-                                                     
+
   #rownames(df) <- M_colnames
-
+  
 
   output <- list(which_test = test,
                  n_perm = n_perm, 
                  pvals = df, 
-                 ccdf = ccdf)
+                 ccdf = ccdf_final)
 
   class(output) <- "citcdf"
   output$type <- "gsa"
   return(output)
 
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-