From 2a8c8259616125c5e9a8ddf23bf3b45eca4fb49e Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Sat, 8 May 2021 00:58:20 +0100 Subject: [PATCH] change bam file, bam index and bam md5 from bamrecompress (biobambam2) to samtools and md5sum --- data/vtlib/alignment_common.json | 1 + data/vtlib/final_output_prep.json | 96 +++++++++++++++++++------------ 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/data/vtlib/alignment_common.json b/data/vtlib/alignment_common.json index 913a48998..d31d3218c 100644 --- a/data/vtlib/alignment_common.json +++ b/data/vtlib/alignment_common.json @@ -15,6 +15,7 @@ {"id":"bwa_executable","required":"no","default":"bwa0_6"}, {"id":"aligner_numthreads","required":"no","default":"2"}, {"id":"bam_ext","required":"no","default":".bam"}, + {"id":"bam_idx_ext","required":"no","default":".bai"}, {"id":"cram_ext","required":"no","default":".cram"}, {"id":"cram_idx_ext","required":"no","default":".cram.crai"}, {"id":"alignment_method","required":"yes"}, diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index a65aeca60..d524762dc 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -104,45 +104,21 @@ "postproc":{"op":"concat","pad":"="} } }, + {"id":"flagstats_filter_flag","required":"no","default":"0x900"}, + {"id":"fopid","required":"yes"}, { - "id":"br_indexfile_flag", - "required":"no", - "subst_constructor":{ - "vals":[ "indexfilename=", {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".bai" ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"br_md5file_flag", - "required":"no", - "subst_constructor":{ - "vals":[ "md5filename=", {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".bam.md5" ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"br_numthreads_flag", - "required":"no", - "subst_constructor":{ - "vals":[ "numthreads=", {"subst":"br_numthreads_val"} ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"br_tmpfile_flag", - "required":"no", + "id":"bam_file", + "required":"yes", "subst_constructor":{ - "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"brtmp"}, "_", {"subst":"fopid"}, ".tmp" ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, {"subst":"bam_ext"} ], "postproc":{"op":"concat", "pad":""} } }, - {"id":"flagstats_filter_flag","required":"no","default":"0x900"}, - {"id":"fopid","required":"yes"}, { - "id":"bam_file", + "id":"bam_index_file", "required":"yes", "subst_constructor":{ - "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, {"subst":"bam_ext"} ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, {"subst":"bam_idx_ext"} ], "postproc":{"op":"concat", "pad":""} } }, @@ -160,6 +136,13 @@ "postproc":{"op":"concat", "pad":""} } }, + { + "id":"bam_md5", + "subst_constructor":{ + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".bam.md5" ], + "postproc":{"op":"concat", "pad":""} + } + }, { "id":"cram_md5", "subst_constructor":{ @@ -293,6 +276,7 @@ {"port":"flagstat", "direction":"out"}, {"port":"calibration_pu", "direction":"out"}, {"port":"bam", "direction":"out"}, + {"port":"md5", "direction":"out"}, {"port":"samtools_stats_F0x900", "direction":"out"}, {"port":"samtools_stats_F0xB00", "direction":"out"}, {"port":"bam_stats", "direction":"out"}, @@ -342,6 +326,13 @@ ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, + { + "id":"create_bam_md5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"md5sum" + }, { "id":"scramble_md5", "type":"EXEC", @@ -357,6 +348,14 @@ "cmd":[ "tr", "-d", " \\-\n" ], "comment":"the double-backslash is required to get the correct character set to the tr command" }, + { + "id":"postprocess_bam_md5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":[ "tr", "-d", " \\-\n" ], + "comment":"the double-backslash is required to get the correct character set to the tr command" + }, { "id":"cram_index", "type":"EXEC", @@ -436,14 +435,34 @@ "cmd": [ {"subst":"calibration_pu_executable"}, "-p", {"subst":"calibration_pu_prefix"}, "-filter-bad-tiles", {"subst":"calibration_pu_bad_tiles_count"}, "-" ] }, { - "id":"bamrecompress", + "id":"diskbam", + "comment":"convert compression level 0 bam stream to bam file", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ "bamrecompress", "verbose=0", "index=1", {"subst":"br_indexfile_flag"}, "md5=1", {"subst":"br_md5file_flag"}, {"subst":"br_numthreads_flag"}, {"subst":"br_tmpfile_flag"} ] + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "view", "-b", + "--threads", {"subst":"diskbam_threads","required":true,"ifnull":4}, + {"subst":"diskbam_extra_flags", "required":false}, + "-" + ] + }, + { + "id":"bam_index", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": false, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "index", + {"subst":"diskbam_extra_flags", "required":false}, + {"port":"bam", "direction":"in"}, + {"port":"idx", "direction":"out"} + ] }, - { "id":"bam_file", "type":"OUTFILE", "name":{"subst":"bam_file"} }, + { "id":"bam_file", "type":"RAFILE", "name":{"subst":"bam_file"} }, + { "id":"bam_index_file", "type":"OUTFILE", "name":{"subst":"bam_index_file"} }, { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, + { "id":"bam_md5", "type":"OUTFILE", "name":{"subst":"bam_md5"} }, { "id":"cram_md5", "type":"OUTFILE", "name":{"subst":"cram_md5"} }, { "id":"seqchksum_file", "type":"RAFILE", "name":{"subst":"seqchksum_file"} }, { "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" }, @@ -502,8 +521,13 @@ { "id":"bmdmw_to_seqchksum_extrahash", "from":"bmd_multiway:seqchksum_extrahash", "to":"seqchksum_extrahash" }, { "id":"bmdmw_to_flagstat", "from":"bmd_multiway:flagstat", "to":"flagstat_filter" }, { "id":"flagstat_filter_to_flagstat", "from":"flagstat_filter", "to":"flagstat" }, - { "id":"bmd_to_bam", "from":"bmd_multiway:bam", "to":"bamrecompress" }, - { "id":"brc_to_bam", "from":"bamrecompress", "to":"bam_file" }, + { "id":"bmd_to_bam", "from":"bmd_multiway:bam", "to":"diskbam" }, + { "id":"stv_to_bam", "from":"diskbam", "to":"bam_file" }, + { "id":"bam_to_idx", "from":"bam_file", "to":"bam_index:bam" }, + { "id":"idx_to_file", "from":"bam_index:idx", "to":"bam_index_file" }, + { "id":"bmw_to_md5", "from":"bmd_multiway:md5", "to":"create_bam_md5" }, + { "id":"md5_to_bam_postprocess", "from":"create_bam_md5", "to":"postprocess_bam_md5" }, + { "id":"corrected_bam_md5_out", "from":"postprocess_bam_md5", "to":"bam_md5" }, { "id":"tee_to_cram", "from":"scramble_tee:cram", "to":"cram_file" }, { "id":"corrected_md5_out", "from":"postprocess_md5", "to":"cram_md5" }, { "id":"scs_to_tee", "from":"seqchksum", "to":"seqchksum_file" },