Skip to content

Commit

Permalink
Merge pull request #26 from Phil9S/dev
Browse files Browse the repository at this point in the history
PR - v1.2.0
  • Loading branch information
Phil9S authored May 16, 2024
2 parents b2f4ec5 + 16f9da1 commit f492f43
Show file tree
Hide file tree
Showing 25 changed files with 842 additions and 232 deletions.
221 changes: 155 additions & 66 deletions README.md

Large diffs are not rendered by default.

41 changes: 38 additions & 3 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,54 @@
---
# Sample sheet
samplesheet: "sample_sheet.tsv"
samplesheet: sample_sheet.tsv

# Output location
out_dir: "/mnt/scratcha/fmlab/smith10/britroc/"
out_dir: results/

# Bin sizes
# By default any in [1,5,15,30,50,100,500,1000]
# Add new line for additional bin sizes
bins:
- 30
project_name: "britroc"
#- 100

# Set project dir name
project_name: nm_test

# Pipeline parameters
af_cutoff: 0.15

# Set seed for CBS - TRUE or FALSE
# default TRUE
use_seed: "TRUE"
seed_val: "9999"

# fitler underpowered solutions - TRUE or FALSE
# Default TRUE
filter_underpowered: "TRUE"

# ploidy range
# Default min = 1.6 | max = 8
ploidy_min: 1.6
ploidy_max: 8.0

# purity range (1 >= max > min >= 0)
# Default min = 0.15 | max = 1.00
purity_min: 0.15
purity_max: 1.0

# Homozygous loss filter - TRUE or FALSE
# Default "TRUE"
filter_homozygous: "TRUE"
# Threshold basepairs lost
# Default 10000000 / 10Mbase
homozygous_prop: 10000000
# Absolute CN homozygous loss threshold
# Default 0.4
homozygous_threshold: 0.4

# container url for swgs-absolutecn
image_base_url: docker://phil9s/
# Not implemented
#custom_bin: false
#custom_bin_folder: "/custom_bin_data/"
52 changes: 52 additions & 0 deletions config/default_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
# Sample sheet
samplesheet: sample_sheet.tsv

# Output location
out_dir: results/

# Bin sizes
# By default any in [1,5,15,30,50,100,500,1000]
# Add new line for additional bin sizes
bins:
- 30
#- 100

# Set project dir name
project_name: nm_test

# Pipeline parameters
af_cutoff: 0.15

# Set seed for CBS - TRUE or FALSE
# default TRUE
use_seed: "TRUE"
seed_val: "9999"

# fitler underpowered solutions - TRUE or FALSE
# Default TRUE
filter_underpowered: "TRUE"

# ploidy range
# Default min = 1.6 | max = 8
ploidy_min: 1.6
ploidy_max: 8.0

# purity range (1 >= max > min >= 0)
# Default min = 0.15 | max = 1.00
purity_min: 0.15
purity_max: 1.0

# Homozygous loss filter - TRUE or FALSE
# Default "TRUE"
filter_homozygous: "TRUE"
# Threshold basepairs lost
# Default 10000000 / 10Mbase
homozygous_prop: 10000000
# Absolute CN homozygous loss threshold
# Default 0.4
homozygous_threshold: 0.4

# Not implemented
#custom_bin: false
#custom_bin_folder: "/custom_bin_data/"
49 changes: 49 additions & 0 deletions dev_tools/report_seg_counts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
args <- commandArgs(trailingOnly=T)
library(yaml)

cat("report segments - use 'report_seg_counts.R all' for individual seg counts\n")

config <- read_yaml(file="config/config.yaml")

projectBin <- paste0(config$project_name,"_",config$bin,"kb")
outputLoc <- paste0(config$out_dir,"sWGS_fitting/",projectBin,"/")

pre <- "absolute_PRE_down_sampling/"
post <- "absolute_POST_down_sampling/abs_cn_rds/"

preFile <- paste0(outputLoc,pre,projectBin,"_relSmoothedCN.rds")
postFile <- paste0(outputLoc,post,projectBin,"_ds_absCopyNumber.rds")

verbose <- FALSE
if(length(args) > 0){
if(args[1] == "all"){
verbose <- TRUE
}
}

if(file.exists(preFile)){
suppressMessages(library(QDNAseqmod))
suppressMessages(library(Biobase))
preS <- readRDS(preFile)
preS <- preS[featureData(preS)$use]
preSegs <- apply(assayDataElement(preS,"segmented"),MARGIN=2,function(x) length(rle(x)$lengths))
cat("\nPre-downsampled segments\n")
if(verbose){
print(preSegs)
} else {
print(summary(preSegs))
}
if(file.exists(postFile)){
postS <- readRDS(postFile)
postS <- postS[featureData(postS)$use]
postSegs <- apply(assayDataElement(postS,"segmented"),MARGIN=2,function(x) length(rle(x)$lengths))
cat("\nPost-downsampled segments\n")
if(verbose){
print(postSegs)
} else {
print(summary(postSegs))
}
}
} else {
cat("no pre or post downsampled files found\n")
}
2 changes: 2 additions & 0 deletions rules/bam_check.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ rule check_bam:
bam=FILE_LIST
output:
OUT_DIR+"sWGS_fitting/{project}_{bin}kb/bam.ok"
singularity:
image_base_url+"swgs-absolutecn:latest"
threads: 1
script:
"../scripts/bam_check.R"
18 changes: 17 additions & 1 deletion rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,26 @@ OUT_DIR=os.path.join(OUT_DIR,"")
samplesheet = pd.read_table(config["samplesheet"],dtype={'PATIENT_ID': str,'SAMPLE_ID':str,'TP53freq':float}).set_index(["SAMPLE_ID"], drop=False)
validate(samplesheet, schema="../schemas/samples.schema.yaml")

# set container uri
image_base_url = config["image_base_url"]

#### Check bin values ####

BIN_VALS = config["bins"]
BIN_DEF = [1,5,15,30,50,100,500,1000]

if not set(BIN_VALS).issubset(BIN_DEF):
sys.exit("Some bin values are not available")
sys.exit("Config error - Some specified bin values are not available")

##### CHECK MAX > MIN #####
PLMIN=config["ploidy_min"]
PLMAX=config["ploidy_max"]
PUMIN=config["purity_min"]
PUMAX=config["purity_max"]

if PLMIN > PLMAX:
sys.exit("Config error - Minimum ploidy exceeds or is equal to maximum ploidy")

if PUMIN > PUMAX:
sys.exit("Config error - Minimum purity exceeds or is equal to maximum purity")

3 changes: 3 additions & 0 deletions rules/downsample.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ rule downsample:
rds=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/{project}_{bin}kb_relSmoothedCN.rds"
output:
OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_POST_down_sampling/downsampled_bams/{sample}.bam"
singularity:
image_base_url+"swgs-absolutecn:latest"
params:
outdir=OUT_DIR,
prplpu=prplpu,
bin="{bin}",
project="{project}",
sample="{sample}"
Expand Down
6 changes: 5 additions & 1 deletion rules/downsampled_rel_rds.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ rule ds_relRDS:
meta=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/{project}_fit_QC_predownsample.tsv"
output:
OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_POST_down_sampling/relative_cn_rds/{project}_{sample}_{bin}kb_relSmoothedCN.rds"
singularity:
image_base_url+"swgs-absolutecn:latest"
params:
outdir=OUT_DIR,
project="{project}",
bin="{bin}"
bin="{bin}",
use_seed=config["use_seed"],
seed_val=config["seed_val"]
script:
"../scripts/qdnaseq_mod_ds.R"
9 changes: 7 additions & 2 deletions rules/filter_gridsearch.smk
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
rule gridsearch_filter:
input:
cl=expand(OUT_DIR+"sWGS_fitting/{{project}}_{{bin}}kb/absolute_PRE_down_sampling/clonality_results/{{project}}_{sample}_clonality.csv",sample=SAMPLES),
cl=expand(OUT_DIR+"sWGS_fitting/{{project}}_{{bin}}kb/absolute_PRE_down_sampling/clonality_results/{{project}}_{sample}_clonality.tsv",sample=SAMPLES),
rds=expand(OUT_DIR+"sWGS_fitting/{{project}}_{{bin}}kb/absolute_PRE_down_sampling/relative_cn_rds/{{project}}_{sample}_{{bin}}kb_relSmoothedCN.rds",sample=SAMPLES)
output:
OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/{project}_fit_QC_predownsample.tsv"
singularity:
image_base_url+"swgs-absolutecn:latest"
params:
bin="{bin}",
meta=config["samplesheet"],
project="{project}",
outdir=OUT_DIR,
af_cutoff=config["af_cutoff"]
af_cutoff=config["af_cutoff"],
filter_underpowered=config["filter_underpowered"],
filter_homozygous=config["filter_homozygous"],
homozygous_prop=config["homozygous_prop"]
threads: THREADS
script:
"../scripts/gridsearch_results_filtering.R"
Expand Down
12 changes: 10 additions & 2 deletions rules/gridsearch.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@ rule gridsearch_fitting:
input:
expand(OUT_DIR+"sWGS_fitting/{{project}}_{{bin}}kb/absolute_PRE_down_sampling/relative_cn_rds/{{project}}_{{sample}}_{{bin}}kb_relSmoothedCN.rds")
output:
csv=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/clonality_results/{project}_{sample}_clonality.csv",
tsv=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/clonality_results/{project}_{sample}_clonality.tsv",
pdf=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/clonality_results/{project}_{sample}_clonality.pdf"
singularity:
image_base_url+"swgs-absolutecn:latest"
params:
bin="{bin}",
outdir=OUT_DIR,
project="{project}"
project="{project}",
meta=config["samplesheet"],
ploidy_min=config["ploidy_min"],
ploidy_max=config["ploidy_max"],
purity_min=config["purity_min"],
purity_max=config["purity_max"],
homozygous_threshold=config["homozygous_threshold"]
script:
"../scripts/ploidy_purity_search_standard_error.R"
6 changes: 5 additions & 1 deletion rules/rel_rds.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@ rule relRDS:
bams=expand(OUT_DIR+"sWGS_fitting/{{project}}_{{bin}}kb/bams/{{sample}}.bam")
output:
OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_PRE_down_sampling/relative_cn_rds/{project}_{sample}_{bin}kb_relSmoothedCN.rds"
singularity:
image_base_url+"swgs-absolutecn:latest"
params:
bin="{bin}",
outdir=OUT_DIR,
project="{project}",
meta=config["samplesheet"]
meta=config["samplesheet"],
use_seed=config["use_seed"],
seed_val=config["seed_val"]
script:
"../scripts/qdnaseq_mod.R"

2 changes: 2 additions & 0 deletions rules/rel_to_abs.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ rule rel_to_abs:
output:
tsv=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_POST_down_sampling/abs_cn_rds/{project}_{bin}kb_ds_abs_fits.tsv",
rds=OUT_DIR+"sWGS_fitting/{project}_{bin}kb/absolute_POST_down_sampling/abs_cn_rds/{project}_{bin}kb_ds_absCopyNumber.rds"
singularity:
image_base_url+"swgs-absolutecn:latest"
params:
outdir=OUT_DIR,
project="{project}",
Expand Down
20 changes: 10 additions & 10 deletions sample_sheet_example.tsv
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
PATIENT_ID SAMPLE_ID TP53freq smooth file
PATIENT-1 SAMPLE_3 NA FALSE /data/SAMPLE_3.bam
PATIENT-2 SAMPLE_5 0.97604930362117 FALSE /data/SAMPLE_5.bam
PATIENT-2 SAMPLE_6 0.948429942418426 FALSE /data/SAMPLE_6.bam
PATIENT-3 SAMPLE_10 0.312743806009489 FALSE /data/SAMPLE_10.bam
PATIENT-3 SAMPLE_11 0.313365853658537 FALSE /data/SAMPLE_11.bam
PATIENT-3 SAMPLE_12 0.170947565543071 FALSE /data/SAMPLE_12.bam
PATIENT-3 SAMPLE_13 0.15861669829222 FALSE /data/SAMPLE_13.bam
PATIENT-3 SAMPLE_7 0.326712851405623 FALSE /data/SAMPLE_7.bam
PATIENT-3 SAMPLE_8 0.361060215053763 FALSE /data/SAMPLE_8.bam
PATIENT_ID SAMPLE_ID TP53freq smooth file precPloidy precPurity
PATIENT-1 SAMPLE_3 NA FALSE /data/SAMPLE_3.bam 3.2 0.76
PATIENT-2 SAMPLE_5 0.97 FALSE /data/SAMPLE_5.bam 2.4 NA
PATIENT-2 SAMPLE_6 0.94 FALSE /data/SAMPLE_6.bam NA 0.55
PATIENT-3 SAMPLE_10 0.31 TRUE /data/SAMPLE_10.bam NA NA
PATIENT-3 SAMPLE_11 NA FALSE /data/SAMPLE_11.bam NA NA
PATIENT-3 SAMPLE_12 0.17 FALSE /data/SAMPLE_12.bam NA NA
PATIENT-3 SAMPLE_13 0.15 FALSE /data/SAMPLE_13.bam NA NA
PATIENT-3 SAMPLE_7 0.32 TRUE /data/SAMPLE_7.bam NA NA
PATIENT-3 SAMPLE_8 0.36 FALSE /data/SAMPLE_8.bam NA NA
53 changes: 51 additions & 2 deletions schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,66 @@ properties:
type: string
bins:
type: array
items:
type: number
uniqueItems: true
project_name:
type: string
af_cutoff:
type: number
min: 0
max: 1.0
minimum: 0
maximum: 1.0
use_seed:
type: string
enum: ["TRUE","FALSE"]
seed_val:
type: string
filter_underpowered:
type: string
enum: ["TRUE","FALSE"]
ploidy_min:
type: number
minimum: 1
maximum: 20
ploidy_max:
type: number
minimum: 1
maximum: 20
purity_min:
type: number
minimum: 0
maximum: 1.0
purity_max:
type: number
minimum: 0
maximum: 1.0
filter_homozygous:
type: string
enum: ["TRUE","FALSE"]
homozygous_prop:
type: number
minimum: 0
homozygous_threshold:
type: number
minimum: 0
maximum: 0.99
image_base_url:
type: string

# entries that have to be in the config file for successful validation
required:
- samplesheet
- out_dir
- bins
- project_name
- use_seed
- seed_val
- filter_underpowered
- ploidy_min
- ploidy_max
- purity_min
- purity_max
- filter_homozygous
- homozygous_prop
- homozygous_threshold

Loading

0 comments on commit f492f43

Please sign in to comment.