Skip to content

Commit

Permalink
syncing to oscar new files
Browse files Browse the repository at this point in the history
  • Loading branch information
Sophia Li committed Mar 12, 2024
1 parent a4a8377 commit cafb476
Show file tree
Hide file tree
Showing 16 changed files with 288 additions and 31 deletions.
13 changes: 7 additions & 6 deletions R/MAPIT.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#' @param variantIndex is a vector containing indices of variants to be included in the computation.

Check warning on line 44 in R/MAPIT.R

View workflow job for this annotation

GitHub Actions / Lint code base

file=/github/workspace/R/MAPIT.R,line=44,col=81,[line_length_linter] Lines should not be more than 80 characters.
#' @param logLevel is a string parameter defining the log level for the logging package.

Check warning on line 45 in R/MAPIT.R

View workflow job for this annotation

GitHub Actions / Lint code base

file=/github/workspace/R/MAPIT.R,line=45,col=81,[line_length_linter] Lines should not be more than 80 characters.
#' @param logFile is a string parameter defining the name of the log file for the logging output. Default is stdout.
#' @param skipProjection is a boolean parameter that toggles projections
#'
#' @return A list of P values and PVEs
#' @examples
Expand Down Expand Up @@ -71,7 +72,7 @@
#' @import Rcpp
mvmapit <- function(
X, Y, Z = NULL, C = NULL, threshold = 0.05, accuracy = 1e-08, test = c("normal", "davies", "hybrid"),
cores = 1, variantIndex = NULL, logLevel = "WARN", logFile = NULL
cores = 1, variantIndex = NULL, logLevel = "WARN", logFile = NULL, skipProjection = FALSE
) {

test <- match.arg(test)
Expand Down Expand Up @@ -121,7 +122,7 @@ mvmapit <- function(
variance_components_ind <- get_variance_components_ind(Y)
if (test == "hybrid") {
log$info("Running normal C++ routine.")
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL) # Normal Z-Test
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, skipProjection = skipProjection, NULL) # Normal Z-Test
pvals <- vc.mod$pvalues
# row.names(pvals) <- rownames(X)
pves <- vc.mod$PVE
Expand All @@ -145,19 +146,19 @@ mvmapit <- function(
)

log$info("Running davies C++ routine on selected SNPs.")
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL)
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, skipProjection = skipProjection, NULL)
davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pvals[, variance_components_ind][ind_matrix] <- davies.pvals[, variance_components_ind][ind_matrix]
} else if (test == "normal") {
log$info("Running normal C++ routine.")
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL)
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, skipProjection = skipProjection, NULL)
pvals <- vc.mod$pvalues
pves <- vc.mod$PVE
timings <- vc.mod$timings
} else {
ind <- ifelse(variantIndex, variantIndex, 1:nrow(X))
log$info("Running davies C++ routine.")
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL)
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, skipProjection = skipProjection, NULL)
pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pvals <- set_covariance_components(variance_components_ind, pvals)
pves <- vc.mod$PVE
Expand Down Expand Up @@ -188,7 +189,7 @@ mvmapit <- function(
tidyr::pivot_longer(cols = !id,
names_to = "trait", values_to = "PVE")
duration_ms <- timings_mean
process <- c("cov", "projections", "vectorize", "q", "S", "vc")
process <- c("cov", "projections", "vectorize", "q", "S", "vc") #hereP
timings_mean <- data.frame(process, duration_ms)
return(list(pvalues = pvals, pves = pves, duration = timings_mean))
}
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, GeneticSimilarityMatrix = NULL) {
.Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, GeneticSimilarityMatrix)
MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, skipProjection = FALSE, GeneticSimilarityMatrix = NULL) {
.Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, skipProjection, GeneticSimilarityMatrix)
}

10 changes: 8 additions & 2 deletions R/simulate_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,15 @@
#' @import parallel
#' @importFrom utils head
#' @importFrom stats var cor sd complete.cases
# simulate_traits <- function(
# genotype_matrix, n_causal = 1000, n_trait_specific = 10, n_pleiotropic = 10,
# H2 = 0.6, d = 2, rho = 0.8, marginal_correlation = 0.3, epistatic_correlation = 0.3,
# group_ratio_trait = 1, group_ratio_pleiotropic = 1, maf_threshold = 0.01, seed = 67132,
# logLevel = "INFO", logFile = NULL
# ) {
simulate_traits <- function(
genotype_matrix, n_causal = 1000, n_trait_specific = 10, n_pleiotropic = 10,
H2 = 0.6, d = 2, rho = 0.8, marginal_correlation = 0.3, epistatic_correlation = 0.3,
genotype_matrix, n_causal = 1000, n_trait_specific = 0, n_pleiotropic = 0,
H2 = 0.6, d = 1, rho = 1, marginal_correlation = 0, epistatic_correlation = 0,
group_ratio_trait = 1, group_ratio_pleiotropic = 1, maf_threshold = 0.01, seed = 67132,
logLevel = "INFO", logFile = NULL
) {
Expand Down
Binary file added data/genotype_data.rda
Binary file not shown.
29 changes: 29 additions & 0 deletions qqplot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
library(dplyr)
library(ggplot2)
library(qqplotr)

# Read the file into a tibble
df <- mvmapit_normal_null$pvalues
dp <- list(rate=log(10))
di <- "exp"
de <- FALSE # enabling the detrend option

# TODO: change causal snp facet labels
gg <- df %>% ggplot(mapping = aes(
sample = -log10(p)
)) +
theme_bw() +
stat_qq_band(distribution = di,
dparams = dp,
detrend = de,
alpha = 0.5) +
stat_qq_line(distribution = di, dparams = dp, detrend = de) +
stat_qq_point(distribution = di, dparams = dp, detrend = de) +
theme(legend.position = "none") +
labs(x = bquote("Theoretical Quantiles " -log[10](p)),
y = bquote("Sample Quantiles " -log[10](p)))

# # TODO: do we need to produce a different file format?
# ggsave(paste0(args$file_path, ".png"), plot = gg, width = 6, height = 4, dpi = 300)

#null just combine all data --> a lot of data points
90 changes: 90 additions & 0 deletions repeated_trials.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
library(tidyr)
library(tidyverse)
library(dplyr)
library(mvMAPIT)
library(ggplot2)

# for alternative data -- need to toggle to find a way to run for null data or could write separate script

mvmapit_simulation <- function(data_type = c("null", "alternative")) {

df <- NULL
if (data_type == "null"){
n_pleiotropic = 0
n_trait_specific = 0
print("null")
} else {
n_pleiotropic = 10
n_trait_specific = 10
print("alternative")
}

for (x in 1:5) {

simulated_data <- simulate_traits(
genotype_data,
n_causal = 1000,
n_trait_specific,
n_pleiotropic,
H2 = 0.6, #error = 1-H^2
d = 1,
rho = 0.2, #decrease number means easier to find snp
marginal_correlation = 0.2,
epistatic_correlation = 0.2,
group_ratio_trait = 1,
group_ratio_pleiotropic = 1,
maf_threshold = 0.01,
seed = sample(c(1:1e6), 1), #random generate
logLevel = "INFO",
logFile = NULL
)

#run on smaller data set -- to test
#might want to randomize columns
mvmapit_normal <- mvmapit(
t(simulated_data$genotype[1:100, 1:100]),
t(simulated_data$trait[1:100, ]),
test = "normal", #could add into method inputs the type of test we want to run - matcharg
skipProjection = FALSE
)

mvmapit_normal2 <- mvmapit(
t(simulated_data$genotype[1:100, 1:100]),
t(simulated_data$trait[1:100, ]),
test = "normal",
skipProjection = TRUE
)

#store data in tidyr where p-value lists are diff for each trial
df <- rbind(df, mvmapit_normal$pvalues %>% mutate(trial = x, projections = "False"))
df <- rbind(df, mvmapit_normal2$pvalues %>% mutate(trial = x, projections = "True"))
}

#compare p-values af entire


#expectation under null -- pvalue uniform distribution (compare between proj and not)
dp <- list(rate=log(10))
di <- "exp"
de <- FALSE # enabling the detrend option

# TODO: change causal snp facet labels
gg <- df %>% ggplot(mapping = aes(
sample = -log10(p)
)) +
theme_bw() +
stat_qq_band(distribution = di,
dparams = dp,
detrend = de,
alpha = 0.5) +
stat_qq_line(distribution = di, dparams = dp, detrend = de) +
stat_qq_point(distribution = di, dparams = dp, detrend = de) +
theme(legend.position = "none") +
labs(x = bquote("Theoretical Quantiles " -log[10](p)),
y = bquote("Sample Quantiles " -log[10](p))) +
facet_wrap(~projections) # check this one
return(gg)
}

gg <- mvmapit_simulation("null")
show(gg)
34 changes: 21 additions & 13 deletions src/MAPIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Rcpp::List MAPITCpp(
Rcpp::Nullable<Rcpp::NumericMatrix> Z = R_NilValue,
Rcpp::Nullable<Rcpp::NumericMatrix> C = R_NilValue,
Rcpp::Nullable<Rcpp::NumericVector> variantIndices = R_NilValue,
std::string testMethod = "normal", int cores = 1,
std::string testMethod = "normal", int cores = 1, bool SKIPPROJECTIONS = false,
Rcpp::Nullable<Rcpp::NumericMatrix> GeneticSimilarityMatrix = R_NilValue) {
int i;
const int n = X.n_cols;
Expand Down Expand Up @@ -161,26 +161,34 @@ Rcpp::List MAPITCpp(
logger->info("Dimensions of polygenic background: {} x {}.", K.n_cols,
K.n_rows);
#endif

// Transform K and G using projection M
start = steady_clock::now();
arma::mat b = arma::zeros(n, z + 2);
arma::mat Yc = Y;
arma::mat b = arma::zeros(n, z + 2);

b.col(0) = arma::ones<arma::vec>(n);
if (z > 0) {
b.cols(1, z) = Zz.t();
b.cols(1, z) = Zz.t();
}
b.col(z + 1) = arma::trans(x_k);

M = compute_projection_matrix(n, b);
K = project_matrix(K, b);
G = project_matrix(G, b);

if (C.isNotNull()) {
Cc = project_matrix(Cc, b);

if(SKIPPROJECTIONS == false) {

K = project_matrix(K, b);
G = project_matrix(G, b);

if (C.isNotNull()) {
Cc = project_matrix(Cc, b);
}
b.reset();

Yc = Y * M; //hereP - Yc called a lot in other lines
} else {
Yc = Y;
}
b.reset();

const arma::mat Yc = Y * M;

end = steady_clock::now();
execution_t(i, 1) = duration_cast<milliseconds>(end - start).count();

Expand Down
14 changes: 7 additions & 7 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// REVIEW CHANGES TO THIS FILE CAREFULLY: causes LTO issues on CRAN
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

Expand All @@ -13,8 +12,8 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// MAPITCpp
Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable<Rcpp::NumericMatrix> Z, Rcpp::Nullable<Rcpp::NumericMatrix> C, Rcpp::Nullable<Rcpp::NumericVector> variantIndices, std::string testMethod, int cores, Rcpp::Nullable<Rcpp::NumericMatrix> GeneticSimilarityMatrix);
RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP GeneticSimilarityMatrixSEXP) {
Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable<Rcpp::NumericMatrix> Z, Rcpp::Nullable<Rcpp::NumericMatrix> C, Rcpp::Nullable<Rcpp::NumericVector> variantIndices, std::string testMethod, int cores, bool skipProjection, Rcpp::Nullable<Rcpp::NumericMatrix> GeneticSimilarityMatrix);
RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP skipProjectionSEXP, SEXP GeneticSimilarityMatrixSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -25,17 +24,18 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::NumericVector> >::type variantIndices(variantIndicesSEXP);
Rcpp::traits::input_parameter< std::string >::type testMethod(testMethodSEXP);
Rcpp::traits::input_parameter< int >::type cores(coresSEXP);
Rcpp::traits::input_parameter< bool >::type skipProjection(skipProjectionSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::NumericMatrix> >::type GeneticSimilarityMatrix(GeneticSimilarityMatrixSEXP);
rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, GeneticSimilarityMatrix));
rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, skipProjection, GeneticSimilarityMatrix));
return rcpp_result_gen;
END_RCPP
}

RcppExport SEXP run_testthat_tests(SEXP use_xml_sxp);
RcppExport SEXP run_testthat_tests(void);

static const R_CallMethodDef CallEntries[] = {
{"_mvMAPIT_MAPITCpp", (DL_FUNC) &_mvMAPIT_MAPITCpp, 8},
{"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 1},
{"_mvMAPIT_MAPITCpp", (DL_FUNC) &_mvMAPIT_MAPITCpp, 9},
{"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 0},
{NULL, NULL, 0}
};

Expand Down
Binary file added testing_sophia/alt_data.rds
Binary file not shown.
Binary file added testing_sophia/null_data.rds
Binary file not shown.
27 changes: 27 additions & 0 deletions testing_sophia/qqplot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
library(dplyr)
library(ggplot2)
library(qqplotr)

# Read the file into a tibble
df <- mvmapit_normal_null$pvalues
dp <- list(rate=log(10))
di <- "exp"
de <- FALSE # enabling the detrend option

# TODO: change causal snp facet labels
gg <- df %>% ggplot(mapping = aes(
sample = -log10(p)
)) +
theme_bw() +
stat_qq_band(distribution = di,
dparams = dp,
detrend = de,
alpha = 0.5) +
stat_qq_line(distribution = di, dparams = dp, detrend = de) +
stat_qq_point(distribution = di, dparams = dp, detrend = de) +
theme(legend.position = "none") +
labs(x = bquote("Theoretical Quantiles " -log[10](p)),
y = bquote("Sample Quantiles " -log[10](p)))

# # TODO: do we need to produce a different file format?
# ggsave(paste0(args$file_path, ".png"), plot = gg, width = 6, height = 4, dpi = 300)
59 changes: 59 additions & 0 deletions testing_sophia/repeated_trials.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Setup a trial specification using a binary, binomially
# distributed, undesirable outcome
binom_trial <- setup_trial_binom(
arms = c("Arm A", "Arm B", "Arm C"),
true_ys = c(0.25, 0.20, 0.30),
# Minimum allocation of 15% in all arms
min_probs = rep(0.15, 3),
data_looks = seq(from = 300, to = 2000, by = 100),
# Stop for equivalence if > 90% probability of
# absolute differences < 5 percentage points
equivalence_prob = 0.9,
equivalence_diff = 0.05,
soften_power = 0.5 # Limit extreme allocation ratios
)


# Run 10 simulations with a specified random base seed
res <- run_trials(binom_trial, n_rep = 10, base_seed = 12345) #how to implement simulation and ohter code??
# in the mix?

# See ?extract_results, ?check_performance, ?summary and ?print for details
# on extracting resutls, summarising and printing

p-values <- list()

for (x in 1:100) {
file_name <- alt_data + str(x)
alt_data <- simulate_traits(
genotype_data,
n_causal = 1000,
n_trait_specific = runif(n=1, min=1, max=20), #random
n_pleiotropic = 0,
H2 = 0.6,
d = 1,
rho = runif(1), #random
marginal_correlation = 0.2,
epistatic_correlation = 0.2,
group_ratio_trait = 1,
group_ratio_pleiotropic = 1,
maf_threshold = 0.01,
seed = 67132,
logLevel = "INFO",
logFile = NULL
)

# Save an object to a file
saveRDS(alt_data, file = file_name + ".rds")
# Restore the object
my_data <- readRDS(file = "~/mvMAPIT/" + file_name + ".rds")

mvmapit_normal_alt <- mvmapit(
t(null_data$genotype),
t(null_data$trait),
test = "normal"
)
fisher <- fishers_combined(mvmapit_normal_alt$pvalues) #do we need this part


}
Loading

0 comments on commit cafb476

Please sign in to comment.