Skip to content

Commit

Permalink
update config and Snakefile to reflect new naming
Browse files Browse the repository at this point in the history
  • Loading branch information
agillen committed Jan 30, 2024
1 parent 252bcc1 commit 5b3b9c1
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 36 deletions.
17 changes: 11 additions & 6 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ configfile: "config.yaml"

DATA = config["DATA"]
RESULTS = config["RESULTS"]
PAIRED_SAMPLES = config["PAIRED_SAMPLES"]
R1_SAMPLES = config["R1_SAMPLES"]
R2_SAMPLES = config["R2_SAMPLES"]
STAR_INDEX = config["STAR_INDEX"]
Expand All @@ -14,12 +15,18 @@ FASTQS = config["FASTQS"]
WHITELIST_V2 = config["WHITELIST_V2"]
WHITELIST_V3 = config["WHITELIST_V3"]
STAR = config["STAR"]
READS = ["R1", "R2"]
READS = ["paired", "R1", "R2"]

import json
with open('chemistry.json') as fp:
chemistry = json.load(fp)

# paired positional approach
PAIREDs = []
if PAIRED_SAMPLES:
PAIREDs.extend(expand("{results}/counts/{sample}_{read}_counts.tsv.gz", results = RESULTS, sample = PAIRED_SAMPLES, read = "paired"))
PAIREDs.extend(expand("{results}/{sample}/{sample}_{read}_Aligned.sortedByCoord.out.bam", results = RESULTS, sample = PAIRED_SAMPLES, read = "paired"))
PAIREDs.extend(expand("{results}/bed/{sample}_{read}.bed.gz", results = RESULTS, sample = PAIRED_SAMPLES, read = "paired"))
# read 1 positional approach
R1s = []
if R1_SAMPLES:
Expand All @@ -33,16 +40,14 @@ if R2_SAMPLES:
R2s.extend(expand("{results}/{sample}/{sample}_{read}_Aligned.sortedByCoord.out.bam", results = RESULTS, sample = R2_SAMPLES, read = "R2"))
R2s.extend(expand("{results}/bed/{sample}_{read}.bed.gz", results = RESULTS, sample = R2_SAMPLES, read = "R2"))
# combine
R12s = R1s + R2s # +expand("{results}/report/multiqc_report.html", results = RESULTS)
R12s = R12s + expand("{data}/{sample}_R1.fastq.gz", data = DATA, sample = R1_SAMPLES) + expand("{data}/{sample}_R2.fastq.gz", data = DATA, sample = R1_SAMPLES)
print(R12s)
R12Ps = PAIREDs + R1s + R2s #+ expand("{results}/report/multiqc_report.html", results = RESULTS)
print(R12Ps)

rule all:
input:
R12s = R12s

include: "rules/check_versions.snake"
include: "rules/download.snake"
include: "rules/cutadapt_star.snake"
include: "rules/count.snake"
include: "rules/qc.snake"
include: "rules/qc.snake"
21 changes: 13 additions & 8 deletions chemistry.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,47 @@
"length": "58",
"bc_cut": "",
"R1": "[WHITELIST_V3,\"--soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17\"]",
"R2": "[WHITELIST_V3,\"--soloUMIlen 12\"]"
"R2": "[WHITELIST_V3,\"--soloUMIlen 12\"]",
"paired": "--alignEndsProtrude 58 ConcordantPair"
},
"chromiumV3UG": {
"length": "58",
"bc_cut": "",
"R1": "[WHITELIST_V3,\"--soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39\"]",
"R2": "[WHITELIST_V3,\"--soloUMIlen 9\"]"
"R1": "[WHITELIST_V3,\"--soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39\"]"
},
"chromiumV2": {
"length": "56",
"bc_cut": "",
"R1": "[WHITELIST_V2,\"--soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17\"]",
"R2": "[WHITELIST_V2,\"--soloUMIlen 10\"]"
"R2": "[WHITELIST_V2,\"--soloUMIlen 10\"]",
"paired": "--alignEndsProtrude 56 ConcordantPair"
},
"dropseq": {
"length": "50",
"bc_cut": "",
"R1": "[\"None --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13\"]",
"R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13\"]"
"R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13\"]",
"paired": "--alignEndsProtrude 50 ConcordantPair"
},
"microwellseq": {
"length": "54",
"bc_cut": "CGACTCACTACAGGG...TCGGTGACACGATCG",
"R1": "[\"None --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19\"]",
"R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19\"]"
"R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19\"]",
"paired": "--alignEndsProtrude 54 ConcordantPair"
},
"bd": {
"length": "53",
"bc_cut": "ACTGGCCTGCGA...GGTAGCGGTGACA",
"R1": "[\"None --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28\"]",
"R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28\"]"
"R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28\"]",
"paired": "--alignEndsProtrude 53 ConcordantPair"
},
"indrop": {
"length": "32",
"bc_cut": "",
"R1": "[\"None --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9\"]",
"R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9\"]"
"R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9\"]",
"paired": "--alignEndsProtrude 32 ConcordantPair"
}
}
5 changes: 4 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,15 @@ POLYA_SITES:
FASTQS:
"sample_fastqs.tsv"

R1_SAMPLES:
PAIRED_SAMPLES:
# Sample basenames
- test
- test2
- test3

R1_SAMPLES:
# Sample basenames

R2_SAMPLES:
- test
- test2
Expand Down
10 changes: 9 additions & 1 deletion rules/cutadapt_star.snake
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,18 @@ def _get_chem_version_R2(wildcards):
args = eval(chemistry[chem_version]["R2"])
return args

""" Extract per-capture chemistry from gex libs (paired)"""
""" Extract per-capture chemistry from gex libs (R1/paired)"""
def _get_chem_version_R1(wildcards):
chem_version = SAMPLES_DF[SAMPLES_DF.capture == wildcards.sample].chemistry.unique()[0]
args = eval(chemistry[chem_version]["R1"])
return args

""" Extract per-capture extra arguments for gex paired alignments """
def _get_chem_version_paired(wildcards):
chem_version = SAMPLES_DF[SAMPLES_DF.capture == wildcards.sample].chemistry.unique()[0]
args = eval(chemistry[chem_version]["paired"])
return args

""" Extract per-capture extra arguments for gex libs """
def _get_extra_args(wildcards):
captures_filtered = SAMPLES_DF[SAMPLES_DF.capture == wildcards.sample]
Expand Down Expand Up @@ -215,6 +221,7 @@ rule starsolo_paired:
"{results}/{sample}/{sample}_paired_Aligned.sortedByCoord.out.bam"
params:
chemistry = _get_chem_version_R1,
star_args_paired = _get_chem_version_paired,
extra_args = _get_extra_args,
out_dir = "{results}/{sample}/{sample}_paired_",
job_name = "star_paired"
Expand All @@ -232,6 +239,7 @@ rule starsolo_paired:
--readFilesCommand gunzip -c \
--runThreadN 12 \
--soloCBwhitelist {params.chemistry} \
{star_args_paired} \
--soloBarcodeMate 1 \
--outFilterMultimapNmax 1 \
--outFilterMismatchNmax 999 \
Expand Down
20 changes: 0 additions & 20 deletions rules/download.snake

This file was deleted.

0 comments on commit 5b3b9c1

Please sign in to comment.