-
-

Fine-mapping with SuSiE RSS model#

-

This notebook take a list of LD reference files and a list of sumstat files from various association studies …

+
+

High-dimensional regression with summary statistics#

+

This notebook take a list of LD reference files and a list of sumstat files from various association studies, and perform:

+
    +
  1. Fine-mapping with SuSiE RSS model

  2. +
  3. TWAS / PRS weights …

  4. +

Input#

I. GWAS Summary Statistics Files

@@ -584,14 +588,14 @@

Output

Each rds file is accompanied by 2 tsv files reporting elements 8 and 9 in tsv format for the sake of convenience.

-
-

MWE#

+
+

Minimal working example#

-
sos run xqtl-pipeline/pipeline/SuSiE_RSS.ipynb SuSiE_RSS \
+
sos run xqtl-pipeline/pipeline/rss_analysis.ipynb univariate_rss \
     --ld-meta-data ADSP_R4_EUR.LD.list \
     --gwas-meta-data AD_sumstat_list.txt \
-    --impute \
+    --impute --qc-method dentist --finemapping-method susie_rss \
     --container oras://ghcr.io/cumc/pecotmr_apptainer:latest
 
@@ -773,12 +777,13 @@

MWE#

-
[SuSiE_RSS_1]
+
[univariate_rss]
 parameter: L = 5
 parameter: max_L = 10
 # If available the column that indicates sample size within the sumstats
 # filtering threshold for raiss imputation
 parameter: rcond = 0.01
+parameter: lamb = 0.01
 parameter: R2_threshold = 0.6
 parameter: l_step = 5
 parameter: pip_cutoff = 0.025
@@ -787,17 +792,17 @@ 

MWE# parameter: coverage = [0.95, 0.7, 0.5] # Whether to impute the sumstat for all the snp in LD but not in sumstat. parameter: impute = False -parameter: QC = False -parameter: bayesian_conditional_analysis = False +# summary stats QC methods: rss_qc, dentist, slalom +parameter: qc_method = "" +# analysis method: single_effect, susie_rss, bayesian_conditional_regression +parameter: finemapping_method = "single_effect" depends: sos_variable("regional_data") regions = list(regional_data['regions'].keys()) input: for_each = "regions" -output: f'{cwd:a}/{step_name[:-2]}/{_regions.replace(":", "_")}.susie_rss.rds' +output: f'{cwd:a}/{step_name}/{_regions.replace(":", "_")}.{"%s" % qc_method.upper() if qc_method else "noQC"}{"_RAISS_imputed" if impute else ""}.univariate{"_%s" % finemapping_method if finemapping_method else ""}.rds' task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}' R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container, entrypoint = entrypoint - library(susieR) library(pecotmr) - library(dplyr) library(data.table) skip_region = c(${', '.join(['"{}"'.format(item) for item in skip_regions])}) studies = c(${', '.join(['"{}"'.format(item) for item in regional_data["GWAS"].keys()])}) @@ -806,39 +811,25 @@

MWE# n_samples = c(${paths([regional_data['GWAS'][x][regional_data['regions'][_regions][0]][2] for x in regional_data["GWAS"].keys()]):r,}) n_cases = c(${paths([regional_data['GWAS'][x][regional_data['regions'][_regions][0]][3] for x in regional_data["GWAS"].keys()]):r,}) n_controls = c(${paths([regional_data['GWAS'][x][regional_data['regions'][_regions][0]][4] for x in regional_data["GWAS"].keys()]):r,}) - LD_data = load_LD_matrix("${ld_meta_data}", '${"chr%s:%s-%s" % (regional_data['regions'][_regions][0], regional_data['regions'][_regions][1], regional_data['regions'][_regions][2])}') - run_rss_pipeline <- function(sumstat_path, column_file_path, LD_data, n_sample, n_case, n_control, skip_region) { - rss_input = get_rss_input(sumstat_path = sumstat_path, column_file_path = column_file_path, n_sample = n_sample, n_case = n_case, n_control = n_control) - sumstats = rss_input$sumstats - n = rss_input$n - var_y = rss_input$var_y - input_processed = rss_input_preprocess(sumstats = sumstats, LD_data = LD_data, skip_region = skip_region) - - - L = ${L} - final_result = susie_rss_pipeline(input_processed, R = LD_data$combined_LD_matrix, ref_panel = LD_data$ref_panel, n = n, L = L, - var_y = var_y, QC = ${"TRUE" if QC else "FALSE"}, - impute = ${"TRUE" if impute else "FALSE"}, - bayesian_conditional_analysis = ${"TRUE" if bayesian_conditional_analysis else "FALSE"}, - rcond = ${rcond}, R2_threshold = ${R2_threshold}, minimum_ld = ${minimum_ld}, max_L = ${max_L}, l_step = ${l_step}, - coverage = ${coverage[0]}, secondary_coverage = c(${",".join([str(x) for x in coverage[1:]])}), pip_cutoff_to_skip = ${skip_analysis_pip_cutoff}, signal_cutoff = ${pip_cutoff}) - final_result$sumstats_allele_qc_only = input_processed - return(final_result) - } - + LD_data = load_LD_matrix("${ld_meta_data}", '${"chr%s:%s-%s" % (regional_data['regions'][_regions][0], regional_data['regions'][_regions][1], regional_data['regions'][_regions][2])}') res = setNames(replicate(length(studies), list(), simplify = FALSE), studies) for (r in 1:length(res)) { tryCatch({ - res[[r]] = run_rss_pipeline(sumstat_path = sumstat_paths[r], column_file_path = column_file_paths[r], + res[[r]] = rss_analysis_pipeline(sumstat_path = sumstat_paths[r], column_file_path = column_file_paths[r], LD_data = LD_data, n_sample = as.numeric(n_samples[r]), n_case = as.numeric(n_cases[r]), - n_control = as.numeric(n_controls[r]), skip_region = skip_region) - write.table(res[[r]]$sumstats_allele_qc_only, sub("studyname",studies[r],"${_output:n}.studyname.sumstats_allele_QCed.tsv"), sep = "\t", col.names=TRUE, row.names=FALSE, quote=FALSE) - if(${"TRUE" if QC else "FALSE"} & ${"TRUE" if impute else "FALSE"}){ - write.table(res[[r]]$sumstats_qc_impute, sub("studyname",studies[r],"${_output:n}.studyname.sumstats_QC_imputed.tsv"), sep = "\t", col.names=TRUE, row.names=FALSE, quote=FALSE) - } + n_control = as.numeric(n_controls[r]), skip_region = skip_region, + qc_method = ${"NULL" if len(qc_method) == 0 else "'%s'" % qc_method}, + impute = ${"TRUE" if impute else "FALSE"}, + impute_opts = list(rcond = ${rcond}, R2_threshold = ${R2_threshold}, minimum_ld = ${minimum_ld}, lamb=${lamb}), + finemapping_method = ${"NULL" if len(finemapping_method) == 0 else "'%s'" % finemapping_method}, + finemapping_opts = list(init_L = ${L}, max_L = ${max_L}, l_step = ${l_step}, coverage = c(${",".join([str(x) for x in coverage])}), signal_cutoff = ${pip_cutoff}), + pip_cutoff_to_skip = ${skip_analysis_pip_cutoff}, + ) + fwrite(res[[r]]$sumstats, file = paste0("${_output:nn}.", studies[r], "sumstats.tsv.gz"), sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE, compress = "gzip") }, error = function(e) { res[[r]] = NULL - cat(paste("Error processing file ", studies[r], ": ", conditionMessage(e), "\n"))} + cat(paste("Error processing file ", studies[r], ": ", conditionMessage(e), "\n")) + } ) } saveRDS(res, file = "${_output}") @@ -848,7 +839,7 @@

MWE#

-
[SuSiE_RSS_2]
+
[univariate_plot]
 output: pip_plot = f"{cwd}/{_input:bn}.png"
 task: trunk_workers = 1, trunk_size = job_size, walltime = '12h', mem = '20G', cores = numThreads, tags = f'{step_name}_{_output:bn}'
 R: container=container, expand = "${ }", stderr = f'{_output[0]:n}.stderr', stdout = f'{_output[0]:n}.stdout', entrypoint = entrypoint
@@ -930,7 +921,7 @@ 

MWE#

diff --git a/code/multivariate_genome/MASH/mash_preprocessing.html b/code/multivariate_genome/MASH/mash_preprocessing.html index dbd9de07..83f72087 100644 --- a/code/multivariate_genome/MASH/mash_preprocessing.html +++ b/code/multivariate_genome/MASH/mash_preprocessing.html @@ -256,7 +256,7 @@

Advanced Genome-wide Analysis

@@ -769,7 +769,12 @@

ExampleAdvanced Genome-wide Analysis

diff --git a/code/pecotmr_integration/SuSiE_enloc.html b/code/pecotmr_integration/SuSiE_enloc.html index ddfa99f1..5e9f38a7 100644 --- a/code/pecotmr_integration/SuSiE_enloc.html +++ b/code/pecotmr_integration/SuSiE_enloc.html @@ -254,7 +254,7 @@

Advanced Genome-wide Analysis

@@ -492,6 +492,7 @@

Contents