From 4dee2bba66d584f9eaa50a44ebfc0604f100d96a Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 13 Oct 2014 13:46:22 +0100 Subject: [PATCH 1/6] add fixmate and adddupmarksupport to the coordinate bamsort command in final_output_prep --- data/vtlib/final_output_prep.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 68834db79..3d1610471 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -25,6 +25,9 @@ "bamsort", "SO=coordinate", "level=0", + "verbose=0", + "fixmate=1", + "adddupmarksupport=1", {"subst_param_name":"bs_tmpfile_flag", "required":"no", "subst_constructor":{ From b0e37a612d91d73c7ae6e9b16f11151eba767d50 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 13 Oct 2014 13:50:31 +0100 Subject: [PATCH 2/6] upgrade bammarkduplicates command to bammarkstreamingmarkduplicates --- data/vtlib/final_output_prep.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 3d1610471..0766b8df5 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -57,8 +57,9 @@ "required":"yes", "subst_constructor":{ "vals":[ - "bammarkduplicates", + "bamstreamingmarkduplicates", "level=0", + "verbose=0", {"subst_param_name":"bmd_tmpfile_flag", "required":"no", "subst_constructor":{ From 083722442057814ba2dd4d0552f881fced67ff59 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 13 Oct 2014 16:41:49 +0100 Subject: [PATCH 3/6] made parameter names consistent between alignment and realignment templates (vtlib) correct template names in README.vtlib remove superfluous file --- data/vtlib/README.vtlib | 6 +- .../vtlib/alignment_wtsi_stage2_template.json | 14 ++--- data/vtlib/bwa_mem_alignment.vtf.prev.json | 57 ------------------- .../realignment_wtsi_stage2_template.json | 21 ++++--- 4 files changed, 20 insertions(+), 78 deletions(-) delete mode 100644 data/vtlib/bwa_mem_alignment.vtf.prev.json diff --git a/data/vtlib/README.vtlib b/data/vtlib/README.vtlib index 1bce66877..9c62b00d4 100644 --- a/data/vtlib/README.vtlib +++ b/data/vtlib/README.vtlib @@ -37,13 +37,13 @@ $ viv.pl -x -s -o v 3 -o viv_run.log aln.json Examples: bwa aln: -$ vtfp.pl -l was2_bwa_aln.vtf.log -o was2_bwa_aln.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_aln -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_aln -keys reposdir -vals /paths/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/wtsi_alignment_stage2_template.json +$ vtfp.pl -l was2_bwa_aln.vtf.log -o was2_bwa_aln.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_aln -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_aln -keys reposdir -vals /paths/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json bwa mem: -$ vtfp.pl -l was2_bwa_mem.vtf.log -o was2_bwa_mem.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_mem -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_mem -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa0_6/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/wtsi_alignment_stage2_template.json +$ vtfp.pl -l was2_bwa_mem.vtf.log -o was2_bwa_mem.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_mem -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_mem -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa0_6/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json tophat2: -$ vtfp.pl -l was2_tophat2.vtf.log -o was2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/wtsi_alignment_stage2_template.json +$ vtfp.pl -l was2_tophat2.vtf.log -o was2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 17928d88c..a418fee5b 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -126,13 +126,13 @@ { "id":"initial_phix_aln_bam", "type":"RAFILE", - "name":{"subst_param_name":"int_clipped_adapters_bam", + "name":{"subst_param_name":"initial_phix_aln_bam", "required":"yes", "subst_constructor":{ "vals":[ {"subst_param_name":"tmpdir","required":"no","default":"."}, "/", - {"subst_param_name":"int_clipped_adapters_bam_name", + {"subst_param_name":"initial_phix_aln_bam_name", "required":"no", "default":"initial_phix_aln.bam", "subst_constructor":{ @@ -150,15 +150,15 @@ } }, { - "id":"pre_auxmerge", + "id":"auxmerge_prep", "type":"VTFILE", - "name":{"subst_param_name":"pre_auxmerge", + "name":{"subst_param_name":"auxmerge_prep", "required":"yes", "subst_constructor":{ "vals":[ {"subst_param_name":"cfgdatadir","required":"no","default":"."}, "/", - {"subst_param_name":"pre_auxmerge_name","required":"no","default":"pre_auxmerge"}, + {"subst_param_name":"auxmerge_prep_name","required":"no","default":"auxmerge_prep"}, ".json" ], "postproc":{"op":"concat", "pad":""} @@ -460,11 +460,11 @@ { "id":"ipab_to_bamreset_pre_aln_flt", "from":"initial_phix_aln_bam", - "to":"pre_auxmerge" + "to":"auxmerge_prep" }, { "id":"bamreset_pre_aln_flt_to_", - "from":"pre_auxmerge", + "from":"auxmerge_prep", "to":"post_alignment_target:no_align_bam" }, { diff --git a/data/vtlib/bwa_mem_alignment.vtf.prev.json b/data/vtlib/bwa_mem_alignment.vtf.prev.json deleted file mode 100644 index 2826051d3..000000000 --- a/data/vtlib/bwa_mem_alignment.vtf.prev.json +++ /dev/null @@ -1,57 +0,0 @@ -{ -"description":"run bwa mem to to align input bam to supplied reference genome", -"subgraph_io":{ - "ports":{ - "inputs":{ - "_stdin_":"bamtofastq", - "reference":"bwa_mem:__DB_PREFIX_REFERENCE_GENOME__" - }, - "outputs":{ - "_stdout_":"samtobam" - } - } -}, -"nodes":[ - { - "id":"bamtofastq", - "type":"EXEC", - "cmd":["bamtofastq"] - }, - { - "id":"bwa_mem", - "type":"EXEC", - "cmd":{"subst_param_name":"bwa_mem_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bwa0_6", - "mem", - "-t", - {"subst_param_name":"aligner_numthreads","required":"no","default":"2"}, - "-p", - "__DB_PREFIX_REFERENCE_GENOME__", - "__FQ_IN__" - ], - "postproc":{"op":"pack","pad":" "} - } - } - }, - { - "id":"samtobam", - "type":"EXEC", - "cmd":"scramble -I sam -O bam" - } -], -"edges":[ - { - "id":"bamtofastq_to_int_fq", - "from":"bamtofastq", - "to":"bwa_mem:__FQ_IN__" - }, - { - "id":"bwa_mem_to_scramble", - "from":"bwa_mem", - "to":"samtobam" - } -] -} diff --git a/data/vtlib/realignment_wtsi_stage2_template.json b/data/vtlib/realignment_wtsi_stage2_template.json index fa75c89cd..5968efc79 100644 --- a/data/vtlib/realignment_wtsi_stage2_template.json +++ b/data/vtlib/realignment_wtsi_stage2_template.json @@ -59,7 +59,7 @@ "vals":[ {"subst_param_name":"reposdir","required":"no","default":"."}, "/", - {"subst_param_name":"alignment_refname","required":"yes"} + {"subst_param_name":"alignment_reference_genome_name","required":"yes"} ], "postproc":{"op":"concat", "pad":""} } @@ -92,7 +92,7 @@ "vals":[ {"subst_param_name":"reposdir","required":"no","default":"."}, "/", - {"subst_param_name":"picardrefdictname","required":"yes"} + {"subst_param_name":"picard_reference_dict_name","required":"yes"} ], "postproc":{"op":"concat", "pad":""} } @@ -109,8 +109,7 @@ {"subst_param_name":"cfgdatadir","required":"no","default":"."}, "/", {"subst_param_name":"post_alignment","required":"no","default":"post_alignment"}, - ".json" - ], + ".json" ], "postproc":{"op":"concat", "pad":""} } }, @@ -124,15 +123,15 @@ "cmd":"bamrecompress verbose=0 numthreads=2" }, { - "id":"no_aln_bam", + "id":"initial_phix_aln_bam", "type":"RAFILE", - "name":{"subst_param_name":"no_aln_bam", + "name":{"subst_param_name":"initial_phix_aln_bam", "required":"yes", "subst_constructor":{ "vals":[ {"subst_param_name":"tmpdir","required":"no","default":"."}, "/", - {"subst_param_name":"no_aln_bam_name", + {"subst_param_name":"initial_phix_aln_bam_name", "required":"no", "default":"no_aln.bam", "subst_constructor":{ @@ -175,7 +174,7 @@ "vals":[ {"subst_param_name":"reposdir","required":"no","default":"."}, "/", - {"subst_param_name":"refname_fasta","required":"yes"} + {"subst_param_name":"reference_genome_fasta_name","required":"yes"} ], "postproc":{"op":"concat", "pad":""} } @@ -192,7 +191,7 @@ "vals":[ {"subst_param_name":"cfgdatadir","required":"no","default":"."}, "/", - {"subst_param_name":"final_output_prep","required":"no","default":"final_output_prep"}, + {"subst_param_name":"final_output_prep_target_name","required":"no","default":"final_output_prep"}, ".json" ], "postproc":{"op":"concat", "pad":""} @@ -301,11 +300,11 @@ { "id":"brci_to_ipab", "from":"bamrecompress_input", - "to":"no_aln_bam" + "to":"initial_phix_aln_bam" }, { "id":"ipab_to_bamreset_pre_aln_flt", - "from":"no_aln_bam", + "from":"initial_phix_aln_bam", "to":"auxmerge_prep" }, { From aa73fc840929cdac290c28441f70c2cd7ed4ac17 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 13 Oct 2014 17:45:32 +0100 Subject: [PATCH 4/6] fixmates, bamstreamingmarkduplicates, adddupmarksupport, and calibration_pu added to *PRODUCTION* bwa_mem template --- data/bwamem_wtsi_stage2_template.vtf | 180 +++++++++++++-------------- 1 file changed, 90 insertions(+), 90 deletions(-) diff --git a/data/bwamem_wtsi_stage2_template.vtf b/data/bwamem_wtsi_stage2_template.vtf index e2060281f..74ad75842 100644 --- a/data/bwamem_wtsi_stage2_template.vtf +++ b/data/bwamem_wtsi_stage2_template.vtf @@ -2,46 +2,18 @@ "description":"Process DNA seq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc", "nodes":[ { - "id":"bmd_phix_tee0", + "id":"bmd_phix_multiway", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":"teepot -w 300 __OUT1__ __OUT2__" + "cmd":"teepot -w 300 __SCRAMBLE_OUT__ __BAMCHECK_OUT__ __FLAGSTAT_OUT__ __CALIBRATION_PU_OUT__ __BAM_OUT__" }, { - "id":"bmd_tee1", + "id":"bmd_target_multiway", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":"teepot -w 300 __OUT1__ __OUT2__" - }, - { - "id":"bmd_tee2", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": false, - "cmd":"teepot -w 300 __OUT1__ __OUT2__" - }, - { - "id":"bmd_tee3", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": false, - "cmd":"teepot -w 300 __OUT1__ __OUT2__" - }, - { - "id":"bmd_phix_tee1", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": false, - "cmd":"teepot -w 300 __OUT1__ __OUT2__" - }, - { - "id":"bmd_phix_tee2", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": false, - "cmd":"teepot -w 300 __OUT1__ __OUT2__" + "cmd":"teepot -w 300 __SCRAMBLE_OUT__ __BAMCHECK_OUT__ __FLAGSTAT_OUT__ __CALIBRATION_PU_OUT__ __BAM_OUT__" }, { "id":"scramble", @@ -79,6 +51,66 @@ "use_STDOUT": true, "cmd":"scramble -I bam -O cram -r __REFERENCE_GENOME__" }, + { + "id":"calibration_pu_target", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":{"subst_param_name":"calibration_pu_target_cmd", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"calibration_pu_executable","required":"no","default":"calibration_pu"}, + "-p", + {"subst_param_name":"calibration_pu_target_prefix", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"bam_ext","required":"no","default":".bam"} + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "-filter-bad-tiles",{"subst_param_name":"calibration_pu_bad_tiles_count","required":"no","default":"2"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } + }, + { + "id":"calibration_pu_phix", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":{"subst_param_name":"calibration_pu_phix_cmd", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"calibration_pu_executable","required":"no","default":"calibration_pu"}, + "-p", + {"subst_param_name":"calibration_pu_phix_prefix", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"phix_bam_ext","required":"no","default":"_phix.bam"} + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "-filter-bad-tiles",{"subst_param_name":"calibration_pu_bad_tiles_count","required":"no","default":"2"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } + }, { "id":"bamcheck_phix", "type":"EXEC", @@ -453,9 +485,6 @@ "use_STDOUT": true, "cmd":"bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 __PREALN_BAM__" }, -##################################### -# alignment filter and metrics output -##################################### { "id":"alignment_filter", "type":"EXEC", @@ -511,15 +540,12 @@ } } }, -####################### -# phiX final processing -####################### { "id":"bamsort_coord_phix", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamsort SO=coordinate level=0" + "cmd":"bamsort level=0 verbose=0 SO=coordinate fixmates=1 adddupmarksupport=1" }, { "id":"bammarkduplicates_phix", @@ -530,8 +556,9 @@ "required":"yes", "subst_constructor":{ "vals":[ - "bammarkduplicates ", + "bamstreamingmarkduplicates ", "level=0 ", + "verbose=0 ", "M=", {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", @@ -584,15 +611,12 @@ } } }, -############################# -# target BAM final processing -############################# { "id":"bamsort_coord_target", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamsort SO=coordinate level=0" + "cmd":"bamsort level=0 verbose=0 SO=coordinate fixmates=1 adddupmarksupport=1" }, { "id":"bammarkduplicates_target", @@ -603,8 +627,9 @@ "required":"yes", "subst_constructor":{ "vals":[ - "bammarkduplicates ", + "bamstreamingmarkduplicates ", "level=0 ", + "verbose=0 ", "M=", {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", @@ -656,9 +681,6 @@ } } }, -################################################## -# check input primary/sequence data matches output -################################################## { "id":"bamcat_output", "type":"EXEC", @@ -703,61 +725,41 @@ { "id":"bammarkduplicates_phix_to_tee", "from":"bammarkduplicates_phix", - "to":"bmd_phix_tee0" - }, - { - "id":"bmd_phix_tee0_to_bmd_phix_tee0", - "from":"bmd_phix_tee0:__OUT2__", - "to":"bmd_phix_tee1" + "to":"bmd_phix_multiway" }, { "id":"bammarkduplicates_to_tee", "from":"bammarkduplicates_target", - "to":"bmd_tee1" + "to":"bmd_target_multiway" }, { "id":"tee_to_scramble_phix", - "from":"bmd_phix_tee1:__OUT2__", + "from":"bmd_phix_multiway:__SCRAMBLE_OUT__", "to":"scramble_phix" }, { "id":"tee_to_scramble", - "from":"bmd_tee1:__OUT2__", + "from":"bmd_target_multiway:__SCRAMBLE_OUT__", "to":"scramble" }, - { - "id":"bmd_phix_tee1_to_bmd_phix_tee2", - "from":"bmd_phix_tee1:__OUT1__", - "to":"bmd_phix_tee2" - }, - { - "id":"bmd_tee1_to_bmd_tee2", - "from":"bmd_tee1:__OUT1__", - "to":"bmd_tee2" - }, { "id":"tee_phix_to_bamcheck_phix", - "from":"bmd_phix_tee2:__OUT1__", + "from":"bmd_phix_multiway:__BAMCHECK_OUT__", "to":"bamcheck_phix" }, { "id":"tee_to_bamcheck", - "from":"bmd_tee2:__OUT2__", + "from":"bmd_target_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, - { - "id":"bmd_tee2_to_bmd_tee3", - "from":"bmd_tee2:__OUT1__", - "to":"bmd_tee3" - }, { "id":"bmd_tee2_phix_to_flagstat", - "from":"bmd_phix_tee2:__OUT2__", + "from":"bmd_phix_multiway:__FLAGSTAT_OUT__", "to":"flagstat_phix" }, { "id":"bmd_tee3_to_flagstat_filter", - "from":"bmd_tee3:__OUT2__", + "from":"bmd_target_multiway:__FLAGSTAT_OUT__", "to":"flagstat_filter" }, { @@ -765,6 +767,16 @@ "from":"flagstat_filter", "to":"flagstat" }, + { + "id":"bmd_mw_phix_to_calibration_pu", + "from":"bmd_phix_multiway:__CALIBRATION_PU_OUT__", + "to":"calibration_pu_phix" + }, + { + "id":"bmd_mw_to_calibration_pu", + "from":"bmd_target_multiway:__CALIBRATION_PU_OUT__", + "to":"calibration_pu_target" + }, { "id":"scramble_to_phix_cram", "from":"scramble_phix", @@ -918,9 +930,6 @@ { "id":"bam12auxmerge_to_alignment_filter", "from":"bam12auxmerge", -################# -# AlignmentFilter -################# "to":"alignment_filter:__TARGET_INBAM__" }, { @@ -933,9 +942,6 @@ "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, -##################### -# phiX BAM production -##################### { "id":"alignmentfilter_to_bamsort_coord_phix", "from":"alignment_filter:__PHIX_OUTBAM__", @@ -948,7 +954,7 @@ }, { "id":"bmd_phix_tee0_to_bamrecompress_phix", - "from":"bmd_phix_tee0:__OUT1__", + "from":"bmd_phix_multiway:__BAM_OUT__", "to":"bamrecompress_phix" }, { @@ -956,9 +962,6 @@ "from":"bamrecompress_phix", "to":"phix_bam" }, -####################### -# Target BAM production -####################### { "id":"alignmentfilter_to_bamsort_coord_target", "from":"alignment_filter", @@ -971,7 +974,7 @@ }, { "id":"bmd_to_bamrecompress", - "from":"bmd_tee3:__OUT1__", + "from":"bmd_target_multiway:__BAM_OUT__", "to":"bamrecompress_target" }, { @@ -979,9 +982,6 @@ "from":"bamrecompress_target", "to":"target_bam" }, -################################################## -# check input primary/sequence data matches output -################################################## { "id":"phix_bam_to_bamcat", "from":"phix_bam", From 47314640ae920c7852bbdc5a1733f2c42ef3bc70 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 14 Oct 2014 13:20:37 +0100 Subject: [PATCH 5/6] added fixmates, calibrartion_pu, adddupmarksupport and bamstreamingmarkduplicates to current production tophat2 template --- data/tophat2_wtsi_stage2_template.vtf | 187 +++++++++++++------------- 1 file changed, 91 insertions(+), 96 deletions(-) diff --git a/data/tophat2_wtsi_stage2_template.vtf b/data/tophat2_wtsi_stage2_template.vtf index bd47e1fdf..d175f3065 100644 --- a/data/tophat2_wtsi_stage2_template.vtf +++ b/data/tophat2_wtsi_stage2_template.vtf @@ -2,46 +2,18 @@ "description":"Process RNASeq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc", "nodes":[ { - "id":"bmd_phix_tee0", + "id":"bmd_phix_multiway", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __pFIFO0__" - }, - { - "id":"bmd_tee1", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __FIFO1__" - }, - { - "id":"bmd_tee2", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __FIFO2__" - }, - { - "id":"bmd_tee3", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __FIFO3__" - }, - { - "id":"bmd_phix_tee1", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __pFIFO1__" + "use_STDOUT": false, + "cmd":"teepot -w 300 __SCRAMBLE_OUT__ __BAMCHECK_OUT__ __FLAGSTAT_OUT__ __CALIBRATION_PU_OUT__ __BAM_OUT__" }, { - "id":"bmd_phix_tee2", + "id":"bmd_target_multiway", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __pFIFO2__" + "use_STDOUT": false, + "cmd":"teepot -w 300 __SCRAMBLE_OUT__ __BAMCHECK_OUT__ __FLAGSTAT_OUT__ __CALIBRATION_PU_OUT__ __BAM_OUT__" }, { "id":"scramble", @@ -79,6 +51,66 @@ "use_STDOUT": true, "cmd":"scramble -I bam -O cram -r __REFERENCE_GENOME__" }, + { + "id":"calibration_pu_target", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":{"subst_param_name":"calibration_pu_target_cmd", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"calibration_pu_executable","required":"no","default":"calibration_pu"}, + "-p", + {"subst_param_name":"calibration_pu_target_prefix", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"bam_ext","required":"no","default":".bam"} + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "-filter-bad-tiles",{"subst_param_name":"calibration_pu_bad_tiles_count","required":"no","default":"2"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } + }, + { + "id":"calibration_pu_phix", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":{"subst_param_name":"calibration_pu_phix_cmd", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"calibration_pu_executable","required":"no","default":"calibration_pu"}, + "-p", + {"subst_param_name":"calibration_pu_phix_prefix", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"phix_bam_ext","required":"no","default":"_phix.bam"} + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "-filter-bad-tiles",{"subst_param_name":"calibration_pu_bad_tiles_count","required":"no","default":"2"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } + }, { "id":"bamcheck_phix", "type":"EXEC", @@ -665,7 +697,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamsort SO=queryname level=0" + "cmd":"bamsort level=0 verbose=0 SO=queryname" }, { "id":"bamreset_prealn", @@ -689,9 +721,6 @@ "use_STDOUT": true, "cmd":"bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 __PREALN_BAM__" }, -##################################### -# alignment filter and metrics output -##################################### { "id":"alignment_filter", "type":"EXEC", @@ -747,15 +776,12 @@ } } }, -####################### -# phiX final processing -####################### { "id":"bamsort_coord_phix", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamsort SO=coordinate level=0" + "cmd":"bamsort level=0 verbose=0 SO=coordinate fixmates=1 adddupmarksupport=1" }, { "id":"bammarkduplicates_phix", @@ -766,7 +792,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - "bammarkduplicates ", + "bamstreamingmarkduplicates ", "level=0 ", "M=", {"subst_param_name":"outdatadir","required":"no","default":"."}, @@ -820,15 +846,12 @@ } } }, -############################# -# target BAM final processing -############################# { "id":"bamsort_coord_target", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamsort SO=coordinate level=0" + "cmd":"bamsort level=0 verbose=0 SO=coordinate fixmates=1 adddupmarksupport=1" }, { "id":"bammarkduplicates_target", @@ -839,7 +862,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - "bammarkduplicates ", + "bamstreamingmarkduplicates ", "level=0 ", "M=", {"subst_param_name":"outdatadir","required":"no","default":"."}, @@ -892,9 +915,6 @@ } } }, -################################################## -# check input primary/sequence data matches output -################################################## { "id":"bamcat_output", "type":"EXEC", @@ -939,61 +959,41 @@ { "id":"bammarkduplicates_phix_to_tee", "from":"bammarkduplicates_phix", - "to":"bmd_phix_tee0" - }, - { - "id":"bmd_phix_tee0_to_bmd_phix_tee0", - "from":"bmd_phix_tee0:__pFIFO0__", - "to":"bmd_phix_tee1" + "to":"bmd_phix_multiway" }, { "id":"bammarkduplicates_to_tee", "from":"bammarkduplicates_target", - "to":"bmd_tee1" + "to":"bmd_target_multiway" }, { "id":"tee_to_scramble_phix", - "from":"bmd_phix_tee1:__pFIFO1__", + "from":"bmd_phix_multiway:__SCRAMBLE_OUT__", "to":"scramble_phix" }, { "id":"tee_to_scramble", - "from":"bmd_tee1:__FIFO1__", + "from":"bmd_target_multiway:__SCRAMBLE_OUT__", "to":"scramble" }, - { - "id":"bmd_phix_tee1_to_bmd_phix_tee2", - "from":"bmd_phix_tee1", - "to":"bmd_phix_tee2" - }, - { - "id":"bmd_tee1_to_bmd_tee2", - "from":"bmd_tee1", - "to":"bmd_tee2" - }, { "id":"tee_phix_to_bamcheck_phix", - "from":"bmd_phix_tee2", + "from":"bmd_phix_multiway:__BAMCHECK_OUT__", "to":"bamcheck_phix" }, { "id":"tee_to_bamcheck", - "from":"bmd_tee2:__FIFO2__", + "from":"bmd_target_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, - { - "id":"bmd_tee2_to_bmd_tee3", - "from":"bmd_tee2", - "to":"bmd_tee3" - }, { "id":"bmd_tee2_phix_to_flagstat", - "from":"bmd_phix_tee2:__pFIFO2__", + "from":"bmd_phix_multiway:__FLAGSTAT_OUT__", "to":"flagstat_phix" }, { "id":"bmd_tee3_to_flagstat_filter", - "from":"bmd_tee3:__FIFO3__", + "from":"bmd_target_multiway:__FLAGSTAT_OUT__", "to":"flagstat_filter" }, { @@ -1001,6 +1001,16 @@ "from":"flagstat_filter", "to":"flagstat" }, + { + "id":"bmd_mw_phix_to_calibration_pu", + "from":"bmd_phix_multiway:__CALIBRATION_PU_OUT__", + "to":"calibration_pu_phix" + }, + { + "id":"bmd_mw_to_calibration_pu", + "from":"bmd_target_multiway:__CALIBRATION_PU_OUT__", + "to":"calibration_pu_target" + }, { "id":"scramble_to_phix_cram", "from":"scramble_phix", @@ -1056,9 +1066,6 @@ "from":"bamrecompress_input", "to":"int_adp_bam" }, -############################################################ -# tophat alignment -############################################################ { "id":"ti_to_bamcollate2_ranking", "from":"tee_input:__FIFO__", @@ -1187,9 +1194,6 @@ { "id":"bam12auxmerge_to_alignment_filter", "from":"bam12auxmerge", -################# -# AlignmentFilter -################# "to":"alignment_filter:__TARGET_INBAM__" }, { @@ -1202,9 +1206,6 @@ "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, -##################### -# phiX BAM production -##################### { "id":"alignmentfilter_to_bamsort_coord_phix", "from":"alignment_filter:__PHIX_OUTBAM__", @@ -1217,7 +1218,7 @@ }, { "id":"bmd_phix_tee0_to_bamrecompress_phix", - "from":"bmd_phix_tee0", + "from":"bmd_phix_multiway:__BAM_OUT__", "to":"bamrecompress_phix" }, { @@ -1225,9 +1226,6 @@ "from":"bamrecompress_phix", "to":"phix_bam" }, -####################### -# Target BAM production -####################### { "id":"alignmentfilter_to_bamsort_coord_target", "from":"alignment_filter", @@ -1240,7 +1238,7 @@ }, { "id":"bmd_to_bamrecompress", - "from":"bmd_tee3", + "from":"bmd_target_multiway:__BAM_OUT__", "to":"bamrecompress_target" }, { @@ -1248,9 +1246,6 @@ "from":"bamrecompress_target", "to":"target_bam" }, -################################################## -# check input primary/sequence data matches output -################################################## { "id":"phix_bam_to_bamcat", "from":"phix_bam", From 7f5771408cb18e0e4a379fb559cc2835c1d6f2d5 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 14 Oct 2014 13:24:06 +0100 Subject: [PATCH 6/6] removed tophat2 realignment template - should now use the subgraph version (under vtlib) --- data/tophat2_wtsi_realign_template.vtf | 937 ------------------------- 1 file changed, 937 deletions(-) delete mode 100755 data/tophat2_wtsi_realign_template.vtf diff --git a/data/tophat2_wtsi_realign_template.vtf b/data/tophat2_wtsi_realign_template.vtf deleted file mode 100755 index 6fe260b3c..000000000 --- a/data/tophat2_wtsi_realign_template.vtf +++ /dev/null @@ -1,937 +0,0 @@ -{ - "description": "Process RNASeq data in BAM files producing WTSI DNAP Sequencing Informatics BAM output (no PhiX treatment): full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc", - "nodes": [ - { - "id": "bmd_tee1", - "type": "EXEC", - "cmd": "tee __FIFO1__" - }, - { - "id": "bmd_tee2", - "type": "EXEC", - "cmd": "tee __FIFO2__" - }, - { - "id": "bmd_tee3", - "type": "EXEC", - "cmd": "tee __FIFO3__" - }, - { - "id": "scramble", - "type": "EXEC", - "cmd": "scramble -I bam -O cram -r __REFERENCE_GENOME__" - }, - { - "id": "bamcheck", - "type": "EXEC", - "cmd": "bamcheck" - }, - { - "id": "flagstat", - "type": "EXEC", - "cmd": "samtools flagstat -" - }, - { - "id": "cram", - "type": "OUTFILE", - "name": { - "subst_param_name": "cram", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "outdatadir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".cram" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, - { - "id": "out_bamcheck", - "type": "OUTFILE", - "name": { - "subst_param_name": "out_bamcheck", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "outdatadir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".bamcheck" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, - { - "id": "out_flagstat", - "type": "OUTFILE", - "name": { - "subst_param_name": "out_flagstat", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "outdatadir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".flagstat" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, - { - "id": "reference_genome_fasta", - "type": "INFILE", - "name": { - "subst_param_name": "reference_genome_fasta", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "reposdir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "refname_fasta", - "required": "yes" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - }, - "description": "Prefix for reference fasta and Bowtie2 index files" - }, - { - "id": "reference_genome", - "type": "INFILE", - "name": { - "subst_param_name": "reference_genome", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "reposdir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "refname", - "required": "yes" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - }, - "description": "Prefix for reference fasta and Bowtie2 index files" - }, - { - "id": "reference_dict", - "type": "INFILE", - "name": { - "subst_param_name": "reference_dict", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "reposdir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "refdictname", - "required": "yes" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - }, - "description": "Extra metadata e.g. UR, M5 auxtags for BAM SQ header records" - }, - { - "id": "src_bam", - "type": "INFILE", - "name": { - "subst_param_name": "src_bam", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "indatadir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".bam" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - }, - "description": "BAM using as input to this pipeline - expected to already contain PhiX (normally from hyb buffer spike-in) alignments" - }, - { - "id": "bamcollate2", - "type": "EXEC", - "cmd": [ - "bamcollate2", - "collate=1", - "level=0" - ], - "comment": "ensure BAM records are gathered by template i.e. queryname" - }, - { - "id": "tee_input", - "type": "EXEC", - "cmd": "tee __FIFO__" - }, - { - "id": "bamrecompress_input", - "type": "EXEC", - "cmd": "bamrecompress verbose=0 numthreads=2" - }, - { - "id": "int_adp_bam", - "type": "RAFILE", - "name": { - "subst_param_name": "int_clipped_adapters_bam", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "tmpdir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "int_clipped_adapters_bam_name", - "required": "no", - "default": "int_adp.bam", - "subst_constructor": { - "vals": [ - "int_adp_", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".bam" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, - { - "id": "bamcollate2_ranking", - "type": "EXEC", - "cmd": "bamcollate2 collate=3 level=0", - "comment": "already collated suitably - just here to do the ranking in the name" - }, - { - "id": "bamreset_tophat", - "type": "EXEC", - "cmd": "bamreset resetaux=1 level=0 verbose=0", - "comment": "Alignment removal also required for bamadapterclip (at least 0.0.142)" - }, - { - "id": "bamadapterfind_and_clip", - "type": "EXEC", - "cmd": "bamadapterfind clip=1 verbose=0 level=0", - "description": "Find and hard clip adapter sequence from reads before feeding to Tophat2" - }, - { - "id": "bamtofastq", - "type": "EXEC", - "cmd": [ - "bamtofastq", - "gz=1", - "F=__FQOUT1__", - "F2=__FQOUT2__" - ] - }, - { - "id": "fq1", - "type": "RAFILE", - "name": { - "subst_param_name": "fastq1", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "tmpdir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "fastq1_name", - "required": "no", - "default": "intfile_1.fq.gz", - "subst_constructor": { - "vals": [ - "intfile_1_", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".fq.gz" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, - { - "id": "fq2", - "type": "RAFILE", - "name": { - "subst_param_name": "fastq2", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "tmpdir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "fastq2_name", - "required": "no", - "default": "intfile_2.fq.gz", - "subst_constructor": { - "vals": [ - "intfile_2_", - { - "subst_param_name": "rpt", - "required": "yes" - }, - ".fq.gz" - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, - { - "id": "tophat2", - "type": "EXEC", - "cmd": { - "subst_param_name": "tophat2_cmd", - "required": "yes", - "subst_constructor": { - "vals": [ - "tophat2", - "--keep-fasta-order", - "--no-sort-bam", - "--output-dir", - { - "subst_param_name": "tophat_out", - "required": "no", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "tophat_dir", - "required": "no", - "default": "tophat_out" - }, - "_", - { - "subst_param_name": "rpt", - "required": "yes" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - }, - "default": "tophat_out" - }, - "--mate-inner-dist","100", - "--num-threads",{"subst_param_name":"num_threads","required":"no","default":"12"}, - "--library-type", {"subst_param_name":"library_type","required":"no","default":"fr-firststrand"}, - "--no-coverage-search", - "--microexon-search", - { - "subst_param_name": "transcriptome_flag_val", - "required": "no", - "subst_constructor": { - "vals": [ - "--transcriptome-index", - { - "subst_param_name": "transcriptome_val", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "reposdir", - "required": "no" - }, - "/transcriptomes/", - { - "subst_param_name": "transcriptome_subpath", - "required": "no" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - ], - "postproc": { - "op": "concat", - "pad": "=" - } - } - }, - "__REFERENCE_GENOME__", - "__FQIN1__", - "__FQIN2__" - ], - "postproc": { - "op": "pack", - "pad": " " - } - } - } - }, - { - "id": "accepted_hits_bam", - "type": "RAFILE", - "subtype": "DUMMY", - "name": { - "subst_param_name": "accepted_hits_bam", - "required": "no", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "tophat_dir", - "required": "no", - "default": "tophat_out" - }, - "_", - { - "subst_param_name": "rpt", - "required": "yes" - }, - "/", - { - "subst_param_name": "accepted_hits", - "required": "no", - "default": "accepted_hits.bam" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - }, - "default": "tophat_out/accepted_hits.bam" - } - }, - { - "id": "unmapped_bam", - "type": "RAFILE", - "subtype": "DUMMY", - "name": { - "subst_param_name": "unmapped_bam", - "required": "no", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "tophat_dir", - "required": "no", - "default": "tophat_out" - }, - "_", - { - "subst_param_name": "rpt", - "required": "yes" - }, - "/", - { - "subst_param_name": "unmapped", - "required": "no", - "default": "unmapped.bam" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - }, - "default": "tophat_out/unmapped.bam" - } - }, - { - "id": "bamcat", - "type": "EXEC", - "cmd": "bamcat I=__IN_BAM1__ I=__IN_BAM2__ level=0" - }, - { - "id": "tee_headerSQfix", - "type": "EXEC", - "cmd": "mbuffer -f -q -m 5M -o - -o __OUT1__", - "comment": "get deadlock when tee used here" - }, - { - "id": "sam_headerSQfix", - "type": "EXEC", - "cmd": "samtools view -h -" - }, - { - "id": "alterSQ_headerSQfix", - "type": "EXEC", - "cmd": "perl -nle 'use strict; use autodie; our%sq; our$re; our$body; BEGIN{$body=0; $re=qr/^\\@SQ.*\\tSN:([^\\t]+)/; open(my$df,q(<),shift@ARGV); while(<$df>){chomp; if(/$re/){$sq{$1}=$_;} } close $df; } next if $body ; if(/$re/){$_=$sq{$1}||$_}elsif(/^[^@]/){open STDOUT,q(>),q(/dev/null); $body=1; next} print' __IN_DICT__", - "comment": "careful to not send SIGPIPE back to tee, yet ensure EOF to reheader as soon as header processed", - "description": "where SN field in SQ header record matches one in the given dict file, replace that SQ record with that in the dict file" - }, - { - "id": "mbuffer_headerSQfix", - "type": "EXEC", - "cmd": "mbuffer -f -q -m 5M" - }, - { - "id": "reheader_headerSQfix", - "type": "EXEC", - "cmd": "samtools reheader __IN_SAMHEADER__ __IN_BAM__" - }, - { - "id": "bam12split_tophat", - "type": "EXEC", - "cmd": "bam12split verbose=0 level=0" - }, - { - "id": "bamsort_qname", - "type": "EXEC", - "cmd": "bamsort SO=queryname level=0" - }, - { - "id": "bamreset_prealn", - "type": "EXEC", - "cmd": "bamreset resetaux=0 auxfilter=RG,PG,BC,RT,QT,tr,tq,br,qr level=0 verbose=0", - "comment": "bam12auxmerge <= 0.0.142 requires SQ headers removed. Alignment removal also required for bamadapterclip (at least 0.0.142)" - }, - { - "id": "bamadapterfind_and_clip_prealn", - "type": "EXEC", - "cmd": "bamadapterfind clip=1 verbose=0 level=0" - }, - { - "id": "bam12auxmerge", - "type": "EXEC", - "cmd": "bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 __PREALN_BAM__" - }, -############################# -# target BAM final processing -############################# - { - "id": "bamsort_coord_target", - "type": "EXEC", - "cmd": "bamsort SO=coordinate level=0" - }, - { - "id": "bammarkduplicates_target", - "type": "EXEC", - "cmd": "bammarkduplicates M=markdups_metrics.txt level=0" - }, - { - "id": "bamrecompress_target", - "type": "EXEC", - "cmd": "bamrecompress numthreads=2" - }, - { - "id": "target_bam", - "type": "OUTFILE", - "name": { - "subst_param_name": "target_bam", - "required": "yes", - "subst_constructor": { - "vals": [ - { - "subst_param_name": "outdatadir", - "required": "no", - "default": "." - }, - "/", - { - "subst_param_name": "rpt", - "required": "yes" - }, - "_", - { - "subst_param_name": "bam_name", - "required": "no", - "default": "aln.bam" - } - ], - "postproc": { - "op": "concat", - "pad": "" - } - } - } - }, -################################################## -# check input primary/sequence data matches output -################################################## - { - "id": "bamcat_output", - "type": "EXEC", - "cmd": "bamcat verbose=0 level=0 __TARGETBAM__" - }, - { - "id": "seqchksum_output", - "type": "EXEC", - "cmd": "bamseqchksum" - }, - { - "id": "seqchksum_input", - "type": "EXEC", - "cmd": "bamseqchksum" - }, - { - "id": "cmp_seqchksum", - "type": "EXEC", - "cmd": "cmp -s __INPUTCHK__ __OUTPUTCHK__", - "description": "check input primary/sequence data matches output" - } - ], - "edges": [ - { - "id": "ref_to_scramble", - "from": "reference_genome_fasta", - "to": "scramble:__REFERENCE_GENOME__" - }, - { - "id": "bammarkduplicates_to_tee", - "from": "bammarkduplicates_target", - "to": "bmd_tee1" - }, - { - "id": "tee_to_scramble", - "from": "bmd_tee1:__FIFO1__", - "to": "scramble" - }, - { - "id": "bmd_tee1_to_bmd_tee2", - "from": "bmd_tee1", - "to": "bmd_tee2" - }, - { - "id": "tee_to_bamcheck", - "from": "bmd_tee2:__FIFO2__", - "to": "bamcheck" - }, - { - "id": "bmd_tee2_to_bmd_tee3", - "from": "bmd_tee2", - "to": "bmd_tee3" - }, - { - "id": "bmd_tee3_to_flagstat", - "from": "bmd_tee3:__FIFO3__", - "to": "flagstat" - }, - { - "id": "scramble_to_cram", - "from": "scramble", - "to": "cram" - }, - { - "id": "bamcheck_to_out", - "from": "bamcheck", - "to": "out_bamcheck" - }, - { - "id": "flagstat_to_out", - "from": "flagstat", - "to": "out_flagstat" - }, - { - "id": "ref_to_tophat2", - "from": "reference_genome", - "to": "tophat2:__REFERENCE_GENOME__" - }, - { - "id": "src_to_bc2", - "from": "src_bam", - "to": "bamcollate2" - }, - { - "id": "bc2_to_ti", - "from": "bamcollate2", - "to": "tee_input" - }, - { - "id": "ti_to_brc", - "from": "tee_input", - "to": "bamrecompress_input" - }, - { - "id": "brc_to_int_adp_bam", - "from": "bamrecompress_input", - "to": "int_adp_bam" - }, -############################################################ -# tophat alignment -############################################################ - { - "id": "ti_to_bamcollate2_ranking", - "from": "tee_input:__FIFO__", - "to": "bamcollate2_ranking" - }, - { - "id": "bamcollate2_ranking_to_bamreset_tophat", - "from": "bamcollate2_ranking", - "to": "bamreset_tophat" - }, - { - "id": "bamreset_tophat_to_bamtofastq", - "from": "bamreset_tophat", - "to": "bamadapterfind_and_clip" - }, - { - "id": "bamadapterclip_to_bamtofastq", - "from": "bamadapterfind_and_clip", - "to": "bamtofastq" - }, - { - "id": "bamtofastq_to_fq1", - "from": "bamtofastq:__FQOUT1__", - "to": "fq1" - }, - { - "id": "bamtofastq_to_fq2", - "from": "bamtofastq:__FQOUT2__", - "to": "fq2" - }, - { - "id": "fq1_to_tophat2", - "from": "fq1", - "to": "tophat2:__FQIN1__" - }, - { - "id": "fq2_to_tophat2", - "from": "fq2", - "to": "tophat2:__FQIN2__" - }, - { - "id": "tophat2_to_accepted_hits_bam", - "from": "tophat2", - "to": "accepted_hits_bam" - }, - { - "id": "tophat2_to_unmapped_bam", - "from": "tophat2", - "to": "unmapped_bam" - }, - { - "id": "accepted_hits_bam_to_bamcat", - "from": "accepted_hits_bam", - "to": "bamcat:__IN_BAM1__" - }, - { - "id": "unmapped_bam_to_bamcat", - "from": "unmapped_bam", - "to": "bamcat:__IN_BAM2__" - }, - { - "id": "bamcat_to_tee_headerSQfix", - "from": "bamcat", - "to": "tee_headerSQfix" - }, - { - "id": "tee_headerSQfix_to_sam", - "from": "tee_headerSQfix:__OUT1__", - "to": "sam_headerSQfix" - }, - { - "id": "reference_dict_to_sam_headerSQfix", - "from": "reference_dict", - "to": "alterSQ_headerSQfix:__IN_DICT__" - }, - { - "id": "sam_headerSQfix_to_alterSQ", - "from": "sam_headerSQfix", - "to": "alterSQ_headerSQfix" - }, - { - "id": "alterSQ_headerSQfix_to_reheader", - "from": "alterSQ_headerSQfix", - "to": "reheader_headerSQfix:__IN_SAMHEADER__" - }, - { - "id": "tee_headerSQfix_to_mbuffer", - "from": "tee_headerSQfix", - "to": "mbuffer_headerSQfix" - }, - { - "id": "mbuffer_headerSQfix_to_reheader", - "from": "mbuffer_headerSQfix", - "to": "reheader_headerSQfix:__IN_BAM__" - }, - { - "id": "reheader_headerSQfix_to_bam12split_tophat", - "from": "reheader_headerSQfix", - "to": "bam12split_tophat" - }, - { - "id": "bam12split_tophat_to_bamsort_qname", - "from": "bam12split_tophat", - "to": "bamsort_qname" - }, - { - "id": "bam_to_bamreset_prealn", - "from": "int_adp_bam", - "to": "bamreset_prealn" - }, - { - "id": "bamreset_prealn_to_bamadapterclip_prealn", - "from": "bamreset_prealn", - "to": "bamadapterfind_and_clip_prealn" - }, - { - "id": "bamadapterclip_prealn_to_bam12auxmerge", - "from": "bamadapterfind_and_clip_prealn", - "to": "bam12auxmerge:__PREALN_BAM__" - }, - { - "id": "bsqn_to_bam12auxmerge", - "from": "bamsort_qname", - "to": "bam12auxmerge" - }, - { - "id": "bam12auxmerge_to_bamsort_coord_target", - "from": "bam12auxmerge", - "to": "bamsort_coord_target" - }, -####################### -# Target BAM production -####################### - { - "id": "bamsort_phix_to_bammarkduplicates_target", - "from": "bamsort_coord_target", - "to": "bammarkduplicates_target" - }, - { - "id": "bmd_to_bamrecompress", - "from": "bmd_tee3", - "to": "bamrecompress_target" - }, - { - "id": "final_stretch", - "from": "bamrecompress_target", - "to": "target_bam" - }, -################################################## -# check input primary/sequence data matches output -################################################## - { - "id": "target_bam_to_bamcat", - "from": "target_bam", - "to": "bamcat_output:__TARGETBAM__" - }, - { - "id": "bamcat_to_chk_output", - "from": "bamcat_output", - "to": "seqchksum_output" - }, - { - "id": "src_bam_to_chk_input", - "from": "src_bam", - "to": "seqchksum_input" - }, - { - "id": "seqchksum_output_to_cmp", - "from": "seqchksum_output", - "to": "cmp_seqchksum:__OUTPUTCHK__" - }, - { - "id": "seqchksum_input_to_cmp", - "from": "seqchksum_input", - "to": "cmp_seqchksum:__INPUTCHK__" - } - ] -}