From b33526dba5acc91fce41325b6a6cc7376dc1df0e Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 4 Aug 2015 16:54:53 +0100 Subject: [PATCH 01/40] Add extra split template - remove human and another genome --- ...lit_extrasplit_notargetalign_template.json | 466 ++++++++++++++++++ data/vtlib/seqchksum_ehs.json | 70 +++ 2 files changed, 536 insertions(+) create mode 100644 data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json create mode 100644 data/vtlib/seqchksum_ehs.json diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json new file mode 100644 index 000000000..685cbf02f --- /dev/null +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json @@ -0,0 +1,466 @@ +{ +"version":"1.0", +"description":"Process DNA (or RNA) seq data in BAM files within second stage of NPG Pipeline producing WTSI DNAP Sequencing Informatics output. This is the version that does human split, and another genome, when there is no target alignment", +"subst_params":[ + { + "id":"basic_pipeline_params_file", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", "alignment_common.json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id": "basic_pipeline_params", + "type":"SPFILE", + "name":{"subst":"basic_pipeline_params_file"}, + "required": "no", + "comment":"this will expand to a set of subst_param elements" + }, + { + "id":"src_bam", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"indatadir"}, "/", {"subst":"rpt"}, ".bam" ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"pre_alignment_es_name","required":"no","default":"pre_alignment"}, + { + "id":"pre_alignment_es", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"pre_alignment_es_name"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"bwa_mem_T_value_es","required":"no","default":"30"}, + {"id":"alignment_es_method","required":"no","default":{"subst":"alignment_method"},"comment":"default to same aligner as for target"}, + { + "id":"alignment_es_vtf", + "comment":"it is possible to use a different aligner to split extra", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"alignment_es_method"}, "_alignment.json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"reference_dict_es", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"reposdir"}, "/", {"subst":"reference_dict_es_name"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"post_alignment_es_name","required":"no","default":"post_alignment"}, + { + "id":"post_alignment_es", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"post_alignment_es_name"}, ".json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"pre_alignment_hs_name","required":"no","default":"pre_alignment"}, + { + "id":"pre_alignment_hs", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"pre_alignment_hs_name"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"bwa_mem_T_value_hs","required":"no","default":"30"}, + {"id":"alignment_hs_method","required":"no","default":{"subst":"alignment_method"},"comment":"default to same aligner as for target"}, + { + "id":"alignment_hs_vtf", + "comment":"it is possible to use a different aligner to split human", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"alignment_hs_method"}, "_alignment.json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"reference_dict_hs", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"reposdir"}, "/", {"subst":"reference_dict_hs_name"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"post_alignment_hs_name","required":"no","default":"post_alignment"}, + { + "id":"post_alignment_hs", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"post_alignment_hs_name"}, ".json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"initial_phix_aln_bam_name", + "required":"no", + "default":"initial_phix_aln.bam", + "subst_constructor":{ + "vals":[ "initial_phix_aln_", {"subst":"rpt"}, ".bam" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"initial_phix_aln_bam", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"initial_phix_aln_bam_name"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"ehs_intfile0_name", + "required":"no", + "default":"ehs_intfile0.bam", + "subst_constructor":{ + "vals":[ "ehs_intfile0_", {"subst":"rpt"}, ".bam" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"ehs_intfile0", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"ehs_intfile0_name"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"auxmerge_prep_name","required":"no","default":"auxmerge_prep"}, + { + "id":"auxmerge_prep", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"auxmerge_prep_name"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"alignment_filter_jar_path","required":"no","default":"/software/npg/java_jars"}, + {"id":"java_cmd","required":"no","default":"java"}, + { + "id":"alignment_filter_jar","required":"no", + "subst_constructor":{ + "vals":[ {"subst":"alignment_filter_jar_path"}, "/", "AlignmentFilter.jar" ], + "postproc":{"op":"concat","pad":""} + } + }, + { + "id":"alignment_filter_cmd", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst":"java_cmd"}, + "-Xmx1000m", + "-jar", {"subst":"alignment_filter_jar"}, + "VALIDATION_STRINGENCY=SILENT", + "CREATE_MD5_FILE=false", + "VERBOSITY=INFO", + "QUIET=false", + "COMPRESSION_LEVEL=5", + "MAX_RECORDS_IN_RAM=500000", + "CREATE_INDEX=false" + ], + "postproc":{"op":"noconcat", "pad":" "} + } + }, + {"id":"af_metrics_name","required":"no","default":"alignment_filter_metrics.json"}, + { + "id":"af_metrics", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"logdir"}, "/", {"subst":"rpt"}, ".", {"subst":"af_metrics_name"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"target_indicator","default":""}, + {"id":"phix_indicator","default":"_phix"}, + {"id":"es_indicator","default":"_extra"}, + {"id":"hs_indicator","default":"_human"}, + { + "id":"fopid_tgt", + "comment":"id passed to the final_output_prep template; used there to construct unique file names", + "subst_constructor":{ + "vals":[ {"subst":"rpt"}, {"subst":"target_indicator"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"fopid_phix", + "comment":"id passed to the final_output_prep template; used there to construct unique file names", + "subst_constructor":{ + "vals":[ {"subst":"rpt"}, {"subst":"phix_indicator"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"fopid_es", + "comment":"id passed to the final_output_prep template; used there to construct unique file names", + "subst_constructor":{ + "vals":[ {"subst":"rpt"}, {"subst":"es_indicator"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"fopid_hs", + "comment":"id passed to the final_output_prep template; used there to construct unique file names", + "subst_constructor":{ + "vals":[ {"subst":"rpt"}, {"subst":"hs_indicator"} ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"final_output_prep_target_name","required":"no","default":"final_output_noalign_prep"}, + { + "id":"final_output_prep_target", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"final_output_prep_target_name"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"final_output_prep_phix_name","required":"no","default":"final_output_prep"}, + { + "id":"final_output_prep_phix", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"final_output_prep_phix_name"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + }, + {"id":"seqchksum_vtf","required":"no","default":"seqchksum_ehs"}, + { + "id":"seqchksum", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"seqchksum_vtf"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + } +], +"nodes":[ + { + "id":"src_bam", + "type":"INFILE", + "name":{"subst":"src_bam"}, + "description":"BAM using as input to this pipeline - expected to already contain PhiX (normally from hyb buffer spike-in) alignments" + }, + { + "id":"bamcollate2", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":["bamcollate2", "collate=1", "level=0"], + "comment":"ensure BAM records are gathered by template i.e. queryname" + }, + { + "id":"tee0", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", + "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__", "__EXTRA_SPLIT_OUT__" ] + }, + { + "id":"pre_alignment_es", + "type":"VTFILE", + "node_prefix":"prealnes_", + "name":{"subst":"pre_alignment_es"}, + "description":"subgraph containing pre alignment process" + }, + { + "id":"es_alignment_reference_genome", + "type":"INFILE", + "name":{"subst":"es_alignment_reference_genome"}, + "description":"Prefix for reference fasta and Bowtie2 index files" + }, + { + "id":"alignment_es", + "type":"VTFILE", + "comment":"inputs: _stdin_ (bam), reference; outputs: _stdout_ (bam)", + "node_prefix":"alnes_", + "subst_map":{"bwa_mem_T_value":{"subst":"bwa_mem_T_value_es"}}, + "name":{"subst":"alignment_es_vtf"}, + "description":"subgraph containing alignment process" + }, + { + "id":"reference_dict_es", + "type":"INFILE", + "name":{"subst":"reference_dict_es"}, + "description":"Extra metadata e.g. UR, M5 auxtags for BAM SQ header records" + }, + { + "id":"post_alignment_es", + "type":"VTFILE", + "comment":"inputs: _stdin_ (bam), reference_dict, no_align_bam; outputs: _stdout_ (bam)", + "node_prefix":"postalnes_", + "name":{"subst":"post_alignment_es"}, + "description":"subgraph containing post alignment process (target)" + }, + { + "id":"pre_alignment_hs", + "type":"VTFILE", + "node_prefix":"prealnhs_", + "name":{"subst":"pre_alignment_hs"}, + "description":"subgraph containing pre alignment process" + }, + { + "id":"hs_alignment_reference_genome", + "type":"INFILE", + "name":{"subst":"hs_alignment_reference_genome"}, + "description":"Prefix for reference fasta and Bowtie2 index files" + }, + { + "id":"alignment_hs", + "type":"VTFILE", + "comment":"inputs: _stdin_ (bam), reference; outputs: _stdout_ (bam)", + "node_prefix":"alnhs_", + "subst_map":{"bwa_mem_T_value":{"subst":"bwa_mem_T_value_hs"}}, + "name":{"subst":"alignment_hs_vtf"}, + "description":"subgraph containing alignment process" + }, + { + "id":"reference_dict_hs", + "type":"INFILE", + "name":{"subst":"reference_dict_hs"}, + "description":"Extra metadata e.g. UR, M5 auxtags for BAM SQ header records" + }, + { + "id":"post_alignment_hs", + "type":"VTFILE", + "comment":"inputs: _stdin_ (bam), reference_dict, no_align_bam; outputs: _stdout_ (bam)", + "node_prefix":"postalnhs_", + "name":{"subst":"post_alignment_hs"}, + "description":"subgraph containing post alignment process (target)" + }, + { + "id":"bamrecompress_input", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"bamrecompress verbose=0 numthreads=2" + }, + { + "id":"initial_phix_aln_bam", + "type":"RAFILE", + "name":{"subst":"initial_phix_aln_bam"} + }, + { + "id":"auxmerge_prep", + "type":"VTFILE", + "node_prefix":"amp_", + "name":{"subst":"auxmerge_prep"}, + "description":"subgraph containing pre alignment process" + }, + { + "id":"tee_amp", + "type":"RAFILE", + "name":{"subst":"ehs_intfile0"}, + "orig_type":"EXEC", + "orig_use_STDIN": true, + "orig_use_STDOUT": false, + "orig_comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", + "orig_cmd":["teepot", {"subst":"teepot_tempdir_flag"}, "-vv", "-w", "30000", "-m", "1G", "__EXTRA_SPLIT_OUT__", "__HUMAN_SPLIT_OUT__" ] + }, + { + "id":"alignment_filter", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__HUMAN_SPLIT_INBAM__", "IN=__EXTRA_SPLIT_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=__HUMAN_SPLIT_OUTBAM__", "OUT=__EXTRA_SPLIT_OUTBAM__", "UNALIGNED=/dev/stdout", "METRICS_FILE=__AF_METRICS__"] + }, + { + "id":"af_metrics", + "type":"OUTFILE", + "name":{"subst":"af_metrics"} + }, + { + "id":"final_output_prep_target", + "type":"VTFILE", + "old_comment":"inputs: _stdin_ (bam), reference_genome_fasta (for scramble); outputs: _stdout_ (bam)", + "comment":"inputs: _stdin_ (bam); outputs: _stdout_ (bam)", + "node_prefix":"foptgt_", + "subst_map":{"phix_or_target":{"subst":"target_indicator"},"fopid":{"subst":"fopid_tgt"},"bstmp":"bsfopt","brtmp":"brfopt","bmdtmp":"bmdfopt"}, + "name":{"subst":"final_output_prep_target"}, + "description":"subgraph containing post alignment_filter process (target)" + }, + { + "id":"final_output_prep_phix", + "type":"VTFILE", + "old_comment":"inputs: _stdin_ (bam), reference_genome_fasta (for scramble); outputs: _stdout_ (bam)", + "comment":"inputs: _stdin_ (bam); outputs: _stdout_ (bam)", + "node_prefix":"fopphx_", + "subst_map":{"phix_or_target":{"subst":"phix_indicator"},"fopid":{"subst":"fopid_phix"},"bstmp":"bsfopp","brtmp":"brfopp","bmdtmp":"bmdfopp","scramble_reference_fasta":{"subst":"phix_reference_genome_fasta"}}, + "name":{"subst":"final_output_prep_phix"}, + "description":"subgraph containing post alignment_filter process (phix)" + }, + { + "id":"final_output_prep_es", + "type":"VTFILE", + "old_comment":"inputs: _stdin_ (bam), reference_genome_fasta (for scramble); outputs: _stdout_ (bam)", + "comment":"inputs: _stdin_ (bam); outputs: _stdout_ (bam)", + "node_prefix":"fopes_", + "subst_map":{"phix_or_target":{"subst":"es_indicator"},"fopid":{"subst":"fopid_es"},"bstmp":"bsfoph","brtmp":"brfoph","bmdtmp":"bmdfoph","scramble_reference_fasta":{"subst":"es_reference_genome_fasta"}}, + "name":{"subst":"final_output_prep_phix"}, + "description":"subgraph containing post alignment_filter process (phix)" + }, + { + "id":"final_output_prep_hs", + "type":"VTFILE", + "old_comment":"inputs: _stdin_ (bam), reference_genome_fasta (for scramble); outputs: _stdout_ (bam)", + "comment":"inputs: _stdin_ (bam); outputs: _stdout_ (bam)", + "node_prefix":"fophs_", + "subst_map":{"phix_or_target":{"subst":"hs_indicator"},"fopid":{"subst":"fopid_hs"},"bstmp":"bsfoph","brtmp":"brfoph","bmdtmp":"bmdfoph","scramble_reference_fasta":{"subst":"hs_reference_genome_fasta"}}, + "name":{"subst":"final_output_prep_phix"}, + "description":"subgraph containing post alignment_filter process (phix)" + }, + { + "id":"seqchksum", + "type":"VTFILE", + "comment":"inputs: _stdin_ (start bam), target_seqchksum, phix_seqchksum; outputs: NONE", + "node_prefix":"scs_", + "name":{"subst":"seqchksum"}, + "description":"subgraph containing seqchksum validation of outputs" + } +], +"edges":[ + { "id":"src_to_bc2", "from":"src_bam", "to":"bamcollate2" }, + { "id":"bc2_to_ti", "from":"bamcollate2", "to":"tee0" }, + { "id":"t0_to_bamcollate2_ranking_phix", "from":"tee0:__PHIX_ALN_OUT__", "to":"bamrecompress_input" }, + { "id":"brci_to_ipab", "from":"bamrecompress_input", "to":"initial_phix_aln_bam" }, + { "id":"ipab_to_bamreset_pre_aln_flt", "from":"initial_phix_aln_bam", "to":"auxmerge_prep" }, + { "id":"amp_to_tee_amp", "from":"auxmerge_prep", "to":"tee_amp" }, + { "id":"tee_amp_to_pae", "orig_from":"tee_amp:__EXTRA_SPLIT_OUT__", "from":"tee_amp", "to":"post_alignment_es:no_align_bam" }, + { "id":"tee_amp_to_pah", "orig_from":"tee_amp:__HUMAN_SPLIT_OUT__", "from":"tee_amp", "to":"post_alignment_hs:no_align_bam" }, + { "id":"t0_to_prealnes", "from":"tee0:__EXTRA_SPLIT_OUT__", "to":"pre_alignment_es" }, + { "id":"prealnes_to_alnes", "from":"pre_alignment_es", "to":"alignment_es" }, + { "id":"esref_to_alignment", "from":"es_alignment_reference_genome", "to":"alignment_es:reference" }, + { "id":"alignment_es_to_post_alignment_es", "from":"alignment_es", "to":"post_alignment_es" }, + { "id":"reference_dict_es_to_post_alignment", "from":"reference_dict_es", "to":"post_alignment_es:reference_dict" }, + { "id":"postalnes_to_alignment_filter", "from":"post_alignment_es", "to":"alignment_filter:__EXTRA_SPLIT_INBAM__" }, + { "id":"t0_to_prealnhs", "from":"tee0:__HUMAN_SPLIT_OUT__", "to":"pre_alignment_hs" }, + { "id":"prealnhs_to_alnhs", "from":"pre_alignment_hs", "to":"alignment_hs" }, + { "id":"hsref_to_alignment", "from":"hs_alignment_reference_genome", "to":"alignment_hs:reference" }, + { "id":"alignment_hs_to_post_alignment_hs", "from":"alignment_hs", "to":"post_alignment_hs" }, + { "id":"reference_dict_hs_to_post_alignment", "from":"reference_dict_hs", "to":"post_alignment_hs:reference_dict" }, + { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_INBAM__" }, + { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_INBAM__" }, + { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, + { "id":"af_to_fopt", "from":"alignment_filter", "to":"final_output_prep_target" }, + { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_OUTBAM__", "to":"final_output_prep_phix" }, + { "id":"af_to_fopes", "from":"alignment_filter:__EXTRA_SPLIT_OUTBAM__", "to":"final_output_prep_es" }, + { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_OUTBAM__", "to":"final_output_prep_hs" }, + { "id":"src_bam_to_seqchksum", "from":"src_bam", "to":"seqchksum" }, + { "id":"fopt_to_bam", "from":"final_output_prep_target", "to":"seqchksum:target_seqchksum" }, + { "id":"fopp_to_bam_phix", "from":"final_output_prep_phix", "to":"seqchksum:phix_seqchksum" }, + { "id":"fopp_to_bam_es", "from":"final_output_prep_es", "to":"seqchksum:es_seqchksum" }, + { "id":"fopp_to_bam_hs", "from":"final_output_prep_hs", "to":"seqchksum:hs_seqchksum" } +] +} diff --git a/data/vtlib/seqchksum_ehs.json b/data/vtlib/seqchksum_ehs.json new file mode 100644 index 000000000..fb723b0fb --- /dev/null +++ b/data/vtlib/seqchksum_ehs.json @@ -0,0 +1,70 @@ +{ +"version":"1.0", +"description":"steps in the alignment pipeline perform a checksum-based comparison of input and output (bam) data. Final validation step in alignment pipeline", +"subgraph_io":{ + "ports":{ + "inputs":{ + "_stdin_":"seqchksum_input", + "target_seqchksum":"merge_output_seqchksum:__TARGET_CHKSUM_IN__", + "es_seqchksum":"merge_output_seqchksum:__EXTRA_SPLIT_CHKSUM_IN__", + "hs_seqchksum":"merge_output_seqchksum:__HUMAN_SPLIT_CHKSUM_IN__", + "phix_seqchksum":"merge_output_seqchksum:__PHIX_CHKSUM_IN__" + } + } +}, +"subst_params":[ + { + "id": "basic_pipeline_params", + "type":"SPFILE", + "name":{"subst":"basic_pipeline_params_file"}, + "required": "no", + "comment":"this will expand to a set of subst_param elements" + }, + { "id":"rehash_acc_flds"}, + { "id":"rehash_chksum_flds"}, + { "id":"rehash_match_flds"}, + { "id":"rehash_acc_flag", "subst_constructor":{ "vals":[ "-a", {"subst":"rehash_acc_flds"} ], "postproc":{"op":"concat","pad":""} } }, + { "id":"rehash_chksum_flag", "subst_constructor":{ "vals":[ "-c", {"subst":"rehash_chksum_flds"} ], "postproc":{"op":"concat","pad":""} } }, + { "id":"rehash_match_flag", "subst_constructor":{ "vals":[ "-m", {"subst":"rehash_match_flds"} ], "postproc":{"op":"concat","pad":""} } }, + { + "id":"seqchksum_merge", + "subst_constructor":{ + "vals":[ + "seqchksum_merge.pl", + {"subst":"rehash_acc_flag"}, + {"subst":"rehash_chksum_flag"}, + {"subst":"rehash_match_flag"} + ], + "postproc":{"op":"pack"} + } + } +], +"nodes":[ + { + "id":"seqchksum_input", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"bamseqchksum" + }, + { + "id":"merge_output_seqchksum", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd":[ {"subst":"seqchksum_merge"}, "__TARGET_CHKSUM_IN__", "__HUMAN_SPLIT_CHKSUM_IN__", "__EXTRA_SPLIT_CHKSUM_IN__", "__PHIX_CHKSUM_IN__" ] + }, + { + "id":"cmp_seqchksum", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": false, + "cmd":"cmp __INPUTCHK_IN__ __OUTPUTCHK_IN__", + "description":"check input primary/sequence data matches output" + } +], +"edges":[ + { "id":"seqchksum_input_to_cmp", "from":"seqchksum_input", "to":"cmp_seqchksum:__INPUTCHK_IN__" }, + { "id":"bamcat_to_chk_output", "from":"merge_output_seqchksum", "to":"cmp_seqchksum:__OUTPUTCHK_IN__" } +] +} From 26eac3737d1f4129cc5add2553ef310d5ece37e4 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Wed, 5 Aug 2015 15:49:56 +0100 Subject: [PATCH 02/40] note extra split template --- Changes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Changes b/Changes index 0820ac1e9..0688b46ab 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,8 @@ CHANGES LOG ----------- + - add (secondary stage) extra split template - remove human and another genome + release 0.17 - human split with no target align (secondary stage) template introduced - initial work for stage one (bcl, adapter, phiX and spatial filter to split processing) using templates From 989dde15f00a1852ccef76f4b4c3e46a4c30234f Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 23 Jul 2015 15:48:51 +0100 Subject: [PATCH 03/40] vtfp.pl - new errors/warnings/information (ewi) reporting - move finalise_cmd (undef removal, array flattening of cmd attributes of EXEC nodes) to process_vtnode() - correct finalise_cmd loop - subst_walk: undefined fetch_subst_value() is not automatically an error - EWI messages reported through logger - apply finalise_cmd processing to final flattened tree instead of at vtnode processing time - allow specification of "local" parameter substitution (within a specific vtnode) - rationalise parameter data structs - parameter specification from file and export of parameter specification to file (-param_vals and -export_param_vals command-line flags) - remove dead code, review relevance of comments, general tidying - always post-process cmd nodes (to pack arrays containing undef values) even when absolute_program_paths is false - add check to make sure that cmd nodes still have some content after packing - fetch_subst_value() handles recursion - use of -export_param_vals flag does not prevent normal completion of template processing - refactored fetch_subst_value() to ensure more consistent/intuitive evaluation of values in param->subst_constructor, param->default and subst->ifnull - added (optional) "ifnull" and "required" functions to subst directives - some correction of out-of-date coments --- bin/vtfp.pl | 739 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 578 insertions(+), 161 deletions(-) diff --git a/bin/vtfp.pl b/bin/vtfp.pl index 906f1f2a9..53d436401 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -20,6 +20,7 @@ use File::Slurp; use JSON; use Storable 'dclone'; +use Hash::Merge qw( merge ); our $VERSION = '0'; @@ -29,6 +30,10 @@ Readonly::Scalar my $VLMED => 2; Readonly::Scalar my $VLMAX => 3; +Readonly::Scalar my $EWI_INFO => 1; +Readonly::Scalar my $EWI_WARNING => 2; +Readonly::Scalar my $EWI_ERROR => 3; + Readonly::Scalar my $MIN_TEMPLATE_VERSION => 1; my $progname = (fileparse($0))[0]; @@ -37,15 +42,17 @@ my $help; my $strict_checks; my $outname; +my $export_param_vals; # file to export params_vals to my $template_path; my $logfile; my $verbosity_level; my $query_mode; my $absolute_program_paths=1; +my @param_vals_fns = (); # a list of input file names containing JSON-formatted params_vals data my @keys = (); my @vals = (); my @nullkeys = (); -GetOptions('help' => \$help, 'strict_checks!' => \$strict_checks, 'verbosity_level=i' => \$verbosity_level, 'template_path=s' => \$template_path, 'logfile=s' => \$logfile, 'outname:s' => \$outname, 'query_mode!' => \$query_mode, 'keys=s' => \@keys, 'values|vals=s' => \@vals, 'nullkeys=s' => \@nullkeys, 'absolute_program_paths!' => \$absolute_program_paths); +GetOptions('help' => \$help, 'strict_checks!' => \$strict_checks, 'verbosity_level=i' => \$verbosity_level, 'template_path=s' => \$template_path, 'logfile=s' => \$logfile, 'outname:s' => \$outname, 'query_mode!' => \$query_mode, 'param_vals=s' => \@param_vals_fns, 'keys=s' => \@keys, 'values|vals=s' => \@vals, 'nullkeys=s' => \@nullkeys, 'export_param_vals:s' => \$export_param_vals, 'absolute_program_paths!' => \$absolute_program_paths); if($help) { croak q[Usage: ], $progname, q{ [-h] [-q] [-s] [-l ] [-o ] [-v ] [-keys -vals ...] }; @@ -54,8 +61,15 @@ # allow multiple options to be separated by commas @keys = split(/,/, join(',', @keys)); @vals = split(/,/, join(',', @vals)); +@param_vals_fns = split(/,/, join(',', @param_vals_fns)); + +my $params = initialise_params(\@keys, \@vals, \@nullkeys, \@param_vals_fns); -my $subst_requests = initialise_subst_requests(\@keys, \@vals, \@nullkeys); +if($export_param_vals) { + open my $epv, ">$export_param_vals" or croak "Failed to open $export_param_vals for export of param_vals"; + print $epv to_json($params); + close $epv or croak q[closing params export file]; +} $query_mode ||= 0; $verbosity_level = $VLMIN unless defined $verbosity_level; @@ -76,14 +90,22 @@ $template_path = []; } -my $param_store; -my $globals = { node_prefixes => { auto_node_prefix => 0, used_prefixes => {}}, vt_file_stack => [], processed_sp_files => {}, template_path => $template_path, }; +my $globals = { node_prefixes => { auto_node_prefix => 0, used_prefixes => {}}, vt_file_stack => [], vt_node_stack => [], processed_sp_files => {}, template_path => $template_path, }; + +my $node_tree = process_vtnode(q[], $vtf_name, q[], $params, $globals); # recursively generate the vtnode tree + +if(report_pv_ewi($node_tree, $logger)) { croak qq[Exiting after process_vtnode...\n]; } -my $node_tree = process_vtnode(q[], $vtf_name, q[], $param_store, $subst_requests, $globals); # recursively generate the vtnode tree my $flat_graph = flatten_tree($node_tree); -if($absolute_program_paths) { - foreach my $node_with_cmd ( grep {$_->{'cmd'}} @{$flat_graph->{'nodes'}}) { +foreach my $node_with_cmd ( grep {$_->{'cmd'}} @{$flat_graph->{'nodes'}}) { + + $node_with_cmd->{cmd} = finalise_cmd($node_with_cmd->{cmd}); + if(not defined $node_with_cmd->{cmd} or (ref $node_with_cmd->{cmd} eq q[ARRAY] and @{$node_with_cmd->{cmd}} < 1)) { + croak "command ", ($node_with_cmd->{id}? $node_with_cmd->{id}: q[NO ID]), " either empty or undefined"; + } + + if($absolute_program_paths) { my $cmd_ref = \$node_with_cmd->{'cmd'}; if(ref ${$cmd_ref} eq 'ARRAY') { $cmd_ref = \${${$cmd_ref}}[0]} ${$cmd_ref} =~ s/\A(\S+)/ abs_path( (-x $1 ? $1 : undef) || (which $1) || croak "cannot find program $1" )/e; @@ -96,48 +118,65 @@ # Done # ######## -########################################################################################## +######################################################################################### # process_vtnode: # vtnode_id - id of the VTFILE node; needed to resolve I/O connections # vtf_name - name of the file to read for this vtfile # node_prefix - if specified (note: zero-length string is "specified"), prefix all nodes # from this vtfile with this string; otherwise auto-generate prefix -# param_store - a list ref of maps of variable names to their values or constructor; +# params - a hash ref containing keys: +# param_store - a stack of maps of variable names to their values or constructor; # supplies the values when subst directives are processed -# subst_requests - a list ref of key/value pairs, keys are subst_param [var]names, values -# are string values; supplied at run time or via subst_map attributes -# in VTFILE nodes -# globals - auxiliary items used for error checking/reporting (and final flattening, e.g. -# node_prefix validation and generation for ensuring unique node ids in -# subgraphs) +# assign - a list ref of key/value pairs, keys are subst_param [var]names, values +# are string values or array refs of strings; supplied at run time or +# via subst_map attributes in VTFILE nodes +# assign_local - a hash ref, keys are colon-separated node paths (node id values) +# specifying the place where the key/values stored in the keys +# should be applied to override or supplement any parameter values +# added to the initial local subst_request entry created for +# VTFILE node expansion +# globals - auxiliary items used for error checking/reporting (and final flattening, +# e.g. node_prefix validation and generation for ensuring unique node ids +# in subgraphs) # # Description: # 1. read cfg for given vtf_name -# 2. process local subst_param section (if any), expanding SPFILE nodes and updating -# param_store +# 2. create and process local subst_param section (if any), expanding SPFILE nodes +# and updating param_store # 3. process subst directives (just nodes and edges) # 4. process nodes, expanding elements of type VTFILE (note: there will be param_store -# and subst_request lists, containing as many entries as the current depth of VTFILE -# nesting) +# and subst_request lists, containing as many entries as the current depth of +# VTFILE nesting) # # Returns: root of tree of vtnodes (for later flattening) -########################################################################################## +######################################################################################### sub process_vtnode { - my ($vtnode_id, $vtf_name, $node_prefix, $param_store, $subst_requests, $globals) = @_; + my ($vtnode_id, $vtf_name, $node_prefix, $params, $globals) = @_; + + my $vtnode = { + id => $vtnode_id, + name => $vtf_name, + cfg => {}, + children => [], + ewi => mkewi(q[node:] . ($vtnode_id? $vtnode_id: q[TOP]) . q[ (name: ] . ($vtf_name? $vtf_name: q[unspec]) . q[)]) }; unless(is_valid_name($vtf_name)) { - $logger->($VLFATAL, q[Missing or invalid name for VTFILE element id], $vtnode_id, q[ (], , join(q[->], @{$globals->{vt_file_stack}}), q[)]); + $vtnode->{ewi}->{additem}->($EWI_ERROR, 0, q[Missing or invalid name for VTFILE element id: ], $vtnode_id, q[ (], , join(q[->], @{$globals->{vt_file_stack}}), q[)]); + + return $vtnode; } if(any { $_ eq $vtf_name} @{$globals->{vt_file_stack}}) { - $logger->($VLFATAL, q[Nesting of VTFILE ], $vtf_name, q[ within itself: ], join(q[->], @{$globals->{vt_file_stack}})); + $vtnode->{ewi}->{additem}->($EWI_ERROR, 0, q[Nesting of VTFILE ], $vtf_name, q[ within itself: ], join(q[->], @{$globals->{vt_file_stack}})); + + return $vtnode; } - my $vtnode = { id => $vtnode_id, name => $vtf_name, cfg => {}, children => [], }; $vtnode->{node_prefix} = get_node_prefix($node_prefix, $globals->{node_prefixes}); $vtnode->{cfg} = read_vtf_version_check($vtf_name, $MIN_TEMPLATE_VERSION, $globals->{template_path}, ); - $param_store = process_subst_params($param_store, $subst_requests, $vtnode->{cfg}->{subst_params}, [ $vtf_name ], $globals); - apply_subst($vtnode->{cfg}, $param_store, $subst_requests); # process any subst directives in cfg (just nodes and edges) + $params = process_subst_params($params, $vtnode->{cfg}->{subst_params}, [ $vtf_name ], $globals, $vtnode->{ewi}); + + apply_subst($vtnode->{cfg}, $params, $vtnode->{ewi}); # process any subst directives in cfg (just nodes and edges) my @vtf_nodes = (); my @nonvtf_nodes = (); @@ -148,24 +187,34 @@ sub process_vtnode { $vtnode->{cfg}->{nodes} = [ @nonvtf_nodes ]; push @{$globals->{vt_file_stack}}, $vtf_name; + push @{$globals->{vt_node_stack}}, $vtnode_id; for my $vtf_node (@vtf_nodes) { # both subst_requests and param_stores have local components my $sr = $vtf_node->{subst_map}; $sr ||= {}; - unshift @$subst_requests, $sr; + + # now update with any "localised" subst_requests from the command-line (replace, not supplement) + my $local_env_key = join(q[:], @{$globals->{vt_node_stack}}) . q{:} . $vtf_node->{id}; + $local_env_key = substr($local_env_key, 1); # remove initial : + if(my $smo = $params->{assign_local}->{$local_env_key}) { + @{$sr}{keys %{$smo}} = values %{$smo}; + } + + unshift @{$params->{assign}}, $sr; my $ps = { varnames => {}, }; - unshift @$param_store, $ps; + unshift @{$params->{param_store}}, $ps; - my $vtc = process_vtnode($vtf_node->{id}, $vtf_node->{name}, $vtf_node->{node_prefix}, $param_store, $subst_requests, $globals); + my $vtc = process_vtnode($vtf_node->{id}, $vtf_node->{name}, $vtf_node->{node_prefix}, $params, $globals); - shift @$param_store; - shift @$subst_requests; + shift @{$params->{param_store}}; + shift @{$params->{assign}}; push @{$vtnode->{children}}, $vtc; } pop @{$globals->{vt_file_stack}}; + pop @{$globals->{vt_node_stack}}; return $vtnode; } @@ -197,12 +246,17 @@ sub get_node_prefix { ####################################################################################### # process_subst_params: -# param_store - a list (ref) of maps of variable names to their values or constructor; +# params - a hash ref containing keys: +# param_store - a stack of maps of variable names to their values or constructor; # supplies the values when subst directives are processed -# subst_requests - a list (ref) of key/value pairs. Keys are subst_param varnames, -# values are string values; supplied at run time or via subst_map -# attributes in VTFILE nodes; used here to expand subst directives -# that appear in subst_param entries +# assign - a list ref of key/value pairs, keys are subst_param [var]names, values +# are string values or array refs of strings; supplied at run time or +# via subst_map attributes in VTFILE nodes +# assign_local - a hash ref, keys are colon-separated node paths (node id values) +# specifying the place where the key/values stored in the keys +# should be applied to override or supplement any parameter values +# added to the initial local subst_request entry created for +# VTFILE node expansion # unprocessed_subst_params - the list of subst_param entries to process; either of # type PARAM (describes how to retrieve/construct the value for the # specified varname) or SPFILE (specifies a file containing @@ -211,26 +265,26 @@ sub get_node_prefix { # warning/error reporting # globals - used here to prevent multiple processing of SPFILE nodes and to pass the # value of template_path +# ewi - record Error/Warning/Info messages here # # Description: # process a subst_param section, adding any varnames declared in it to the "local" -# param_store and recursively processing any included files specified by elements -# of type SPFILE. +# param_store (element 0 in the param_store stack) and recursively processing any +# included files specified by elements of type SPFILE. # # In other words, step through unprocessed subst_param entries: # a) if element is of type PARAM, add it to the "local" param_store # b) if element is of type SPFILE, [queue it up for] make a recursive call to # process_subst_params() to expand it # -# A stack of spfile names is passed to recursive calls to allow construction of -# error strings for later reporting (though initially just croak). Consider a slightly -# more sophisticated structure for elements on this stack to improve error reporting +# A stack of spfile names is passed to recursive calls to allow construction of error +# strings for later reporting. ####################################################################################### sub process_subst_params { - my ($param_store, $subst_requests, $unprocessed_subst_params, $sp_file_stack, $globals) = @_; + my ($params, $unprocessed_subst_params, $sp_file_stack, $globals, $ewi) = @_; my @spfile_node_queue = (); - $param_store ||= [ { varnames => {}, } ]; + my $param_store = $params->{param_store}; for my $i (0..$#{$unprocessed_subst_params}) { @@ -239,25 +293,25 @@ sub process_subst_params { my $sptype = $sp->{type}; $sptype ||= q[PARAM]; - if($sptype eq q[SPFILE]) { # process recursively + if($sptype eq q[SPFILE]) { # process recursively # SPFILE entries will be processed after all PARAM-type entries have been processed (for consistency in redeclaration behaviour) push @spfile_node_queue, $sp; } elsif($sptype eq q[PARAM]) { # all unprocessed_subst_params elements of type PARAM must have an id if(not $spid) { - # it would be better to cache these errors and report as many as possible before exit (TBI) - $logger->($VLFATAL, q[No id for PARAM element, entry ], $i, q[ (], , join(q[->], @$sp_file_stack), q[)]); + # cache errors so we can report as many as possible before exit + $ewi->{additem}->($EWI_ERROR, 0, q[No id for PARAM element, entry ], $i, q[ (], , join(q[->], @$sp_file_stack), q[)]); } my $ips = in_param_store($param_store, $spid); if($ips->{errnum} != 0) { # multiply defined - OK unless explicitly declared multiple times at this level if($ips->{errnum} > 0) { # a previous declaration was made by an ancestor of the current vtnode - $logger->($VLMED, qq[INFO: Duplicate subst_param definition for $spid (], join(q[->], @$sp_file_stack), q[); ], $ips->{ms}); + $ewi->{additem}->($EWI_INFO, 2, qq[INFO: Duplicate subst_param definition for $spid (], join(q[->], @$sp_file_stack), q[); ], $ips->{ms}); } else { - # it would be better to cache these errors and report as many as possible before exit (TBI) - $logger->($VLFATAL, qq[Fatal error: Duplicate (local) subst_param definition for $spid (], join(q[->], @$sp_file_stack), q[); ], $ips->{ms}); + # cache errors so we can report as many as possible before exit + $ewi->{additem}->($EWI_ERROR, 0, qq[Fatal error: Duplicate (local) subst_param definition for $spid (], join(q[->], @$sp_file_stack), q[); ], $ips->{ms}); } } @@ -266,7 +320,7 @@ sub process_subst_params { $param_store->[0]->{varnames}->{$spid} = $sp; # adding to the "local" variable store } else { - $logger->($VLFATAL, q[Unrecognised type for subst_param element: ], $sptype, q[; entry ], $i, q[ (], , join(q[->], @$sp_file_stack), q[)]); + $ewi->{additem}->($EWI_ERROR, 0, q[Unrecognised type for subst_param element: ], $sptype, q[; entry ], $i, q[ (], , join(q[->], @$sp_file_stack), q[)]); } } @@ -274,14 +328,14 @@ sub process_subst_params { # now process the SPFILE entries ################################ for my $spfile (@spfile_node_queue) { - subst_walk($spfile, $param_store, $subst_requests, []); + my $ewi = mkewi(q[SPF]); + subst_walk($spfile, $params, [], $ewi); my $spname = is_valid_name($spfile->{name}); if(not $spname) { # it would be better to cache these errors and report as many as possible before exit (TBI) - $logger->($VLFATAL, q[Missing or invalid name for SPFILE element id], $spfile->{id}, q[ (], , join(q[->], @$sp_file_stack), q[)]); + $ewi->{additem}->($EWI_ERROR, 0, q[Missing or invalid name for SPFILE element id], $spfile->{id}, q[ (], , join(q[->], @$sp_file_stack), q[)]); } - - if(not $globals->{processed_sp_files}->{$spname}) { # but only process a given SPFILE once + elsif(not $globals->{processed_sp_files}->{$spname}) { # but only process a given SPFILE once $globals->{processed_sp_files}->{$spname} = 1; # flag this SPFILE name as seen my $cfg = read_vtf_version_check($spname, $MIN_TEMPLATE_VERSION, $globals->{template_path},); @@ -290,16 +344,16 @@ sub process_subst_params { # files must contain (new-style) subst_param sections to be useful if(defined $cfg->{subst_params}) { push @$sp_file_stack, $spname; - process_subst_params($param_store, $subst_requests, $cfg->{subst_params}, $sp_file_stack, $globals); + process_subst_params($params, $cfg->{subst_params}, $sp_file_stack, $globals, $ewi); pop @$sp_file_stack; } } else { - $logger->($VLMAX, qq[INFO: Not processing reoccurrence of SPFILE $spname (], join(q[->], @$sp_file_stack), q[)]); # needs to be a high-verbosity warning + $ewi->{additem}->($EWI_INFO, 3, qq[INFO: Not processing reoccurrence of SPFILE $spname (], join(q[->], @$sp_file_stack), q[)]); } } - return $param_store; + return $params; } ########################################################################## @@ -335,20 +389,23 @@ sub in_param_store { # replace subst directives with values ####################################### sub apply_subst { - my ($cfg, $param_store, $subst_requests) = @_; # process any subst directives in cfg (just nodes and edges?) + my ($cfg, $params, $ewi) = @_; # process any subst directives in cfg (just nodes and edges?) for my $elem (@{$cfg->{nodes}}, @{$cfg->{edges}}) { - subst_walk($elem, $param_store, $subst_requests, []); + $ewi->{addlabel}->(q{assigning to id:[} . $elem->{id} . q{]}); + subst_walk($elem, $params, [], $ewi); + $ewi->{removelabel}->(); } + + return; } -############################################################################################################## +############################################################################################## # subst_walk: -# walk the given element, looking for "subst" directives. When found search the param_store and subst_request -# lists for the desired key/value pair -############################################################################################################## +# walk the given element, looking for "subst" directives. When found search params for value. +############################################################################################## sub subst_walk { - my ($elem, $param_store, $subst_requests, $labels) = @_; + my ($elem, $params, $labels, $ewi) = @_; my $r = ref $elem; if(!$r) { @@ -360,13 +417,13 @@ sub subst_walk { if(ref $elem->{$k} eq q[HASH] and my $param_name = $elem->{$k}->{subst}) { # value for a "subst" key must always be the name of a parameter if(ref $param_name) { - $logger->($VLFATAL, q[value for a subst directive must be a param (not a reference), key for subst is: ], $k); + $ewi->{additem}->($EWI_ERROR, 0, q[value for a subst directive must be a param (not a reference), key for subst is: ], $k); } - $elem->{$k} = fetch_subst_value($param_name, $param_store, $subst_requests); + $elem->{$k} = fetch_subst_value($elem->{$k}, $params, $ewi); - unless(defined $elem->{$k}) { - $logger->($VLFATAL, croak q[Failed to fetch subst value for parameter ], $param_name, q[ (key was ], $k, q[)]); + unless(defined $elem->{$k}) { # this has been changed to INFO. If ERROR is wanted, required attribute should be set so that fetch_subst_value() flags it + $ewi->{additem}->($EWI_INFO, 1, q[Failed to fetch subst value for parameter ], $param_name, q[ (key was ], $k, q[)]); } next; @@ -374,7 +431,7 @@ sub subst_walk { if(ref $elem->{$k}) { push @$labels, $k; - subst_walk($elem->{$k}, $param_store, $subst_requests, $labels); + subst_walk($elem->{$k}, $params, $labels, $ewi); pop @$labels; } } @@ -385,10 +442,10 @@ sub subst_walk { if(ref $elem->[$i] eq q[HASH] and my $param_name = $elem->[$i]->{subst}) { # value for a "subst" key must always be the name of a parameter if(ref $param_name) { - $logger->($VLFATAL, q[value for a subst directive must be a param name (not a reference), index for subst is: ], $i); + $ewi->{additem}->($EWI_ERROR, 0, q[value for a subst directive must be a param name (not a reference), index for subst is: ], $i); } - my $sval = fetch_subst_value($param_name, $param_store, $subst_requests); + my $sval = fetch_subst_value($elem->[$i], $params, $ewi); if(ref $sval eq q[ARRAY]) { splice @$elem, $i, 1, @$sval; } @@ -396,8 +453,8 @@ sub subst_walk { $elem->[$i] = $sval; } - unless(defined $elem->[$i]) { - $logger->($VLFATAL, q[Failed to fetch subst value for parameter ], $param_name, q[ (element index was ], $i); + unless(defined $elem->[$i]) { # this has been changed to INFO. If ERROR is wanted, required attribute should be set so that fetch_subst_value() flags it + $ewi->{additem}->($EWI_INFO, 1, q[Failed to fetch subst value for parameter ], $param_name, q[ (element index was ], $i, q[)],); } next; @@ -405,7 +462,7 @@ sub subst_walk { if(ref $elem->[$i]) { push @$labels, sprintf(q[ArrayElem%03d], $i); - subst_walk($elem->[$i], $param_store, $subst_requests, $labels); + subst_walk($elem->[$i], $params, $labels, $ewi); pop @$labels; } } @@ -413,7 +470,7 @@ sub subst_walk { elsif(ref $elem eq q[JSON::XS::Boolean]) { } else { - $logger->($VLMED, "REF TYPE $r currently not processable"); + $ewi->{add_item}->($EWI_WARNING, 2, "REF TYPE $r currently not processable"); } return; @@ -425,116 +482,243 @@ sub subst_walk { # a value for the given param_name. The _value attribute of a # param_entry caches successfully resolved values. # -# 1. Search the param_store for an entry for this param_name. -# 2. If there isn't a param_store entry, add [an unset] one. -# 3. If the param_entry _value attribute is set, return that. -# 4. Search subst_requests for a value for this param_name. If -# one is found, return it. -# 5. Try evaluating the param_entry. If it resolves, return that -# value. -# 6. If a default value value was specified in the param_entry, -# return that. -# 7. If the required attribute of the param_entry is true, -# it is a fatal error; otherwise return undef +# 1. If the value has already been resolved in the local +# param_store, return that value. +# 2. Search the param_store stack for an entry for this +# param_name, working outwards from the local level0 +# param_store +# 3. if only a non-local entry is found, copy it to the local +# param_store; if no entry is found, create one in the +# local param_store +# 4. search the assign/subst_requests stack (from local outward) +# for user-specified value assignment - these will override +# any other assignments (e.g. defaults, subst_maps or +# subst_constructors specified in the template). If a value +# is found, return it. +# 5. If the parameter has a subst_constructor attribute, use +# that to construct the value [and return it]. +# 6. If the value is still undefined, evaluate the param_entry's +# default attribute (if any) +# 7. If the value is still undefined, evaluate the subst entry's +# ifnull attribute (if any). Note: this value should not +# be cached to the _value attribute of the param_entry. +# 8. If the value is still undefined, flag an error if the +# substitution is flagged as required. +# 9. If the value is still undefined, flag an error if the +# parameter is flagged as required. ################################################################## sub fetch_subst_value { - my ($param_name, $param_store, $subst_requests) = @_; + my ($subst, $params, $ewi, $irp) = @_; my $param_entry; my $retval; + # check to see if an sp_expr needs evaluating + if(ref $subst->{subst}) { # subst name is itself an expression which needs evaluation + $subst->{subst} = fetch_sp_value($subst->{subst}, $params, $ewi, $irp); + } + + if(ref $subst->{subst}) { # TODO - consider implications of allowing an array here + $ewi->{additem}->($EWI_ERROR, 0, q[subst value cannot be a ref (type: ], ref $subst->{subst}, q[)]); + return; + } + + my $param_name = $subst->{subst}; + + if(defined $irp and any { $_ eq $param_name} @{$irp}) { # infinite recursion prevention + $ewi->{additem}->($EWI_ERROR, 0, q[infinite recursion detected resolving parameter ], $param_name, q[ (], join(q/=>/, (@{$irp}, $param_name)), q[)]); + return; + } + + my $param_store = $params->{param_store}; + + if(defined $param_store->[0]->{varnames}->{$param_name} and exists $param_store->[0]->{varnames}->{$param_name}->{_value}) { # allow undef value + + if(not defined $param_store->[0]->{varnames}->{$param_name}->{_value} and defined $subst->{required} and $subst->{required} eq q[yes]) { + $ewi->{additem}->($EWI_ERROR, 0, q[Undef value specified for required subst (param_name: ], $param_name, q[)]); + } + + return $param_store->[0]->{varnames}->{$param_name}->{_value}; # already evaluated, return cached value + } + for my $ps (@$param_store) { - $param_entry = $ps->{varnames}->{$param_name}; - if($param_entry) { last; } + if(exists $ps->{varnames}->{$param_name} and $ps->{varnames}->{$param_name}->{id} eq $param_name) { + $param_entry = $ps->{varnames}->{$param_name}; + last; + } } if(not defined $param_store->[0]->{varnames}->{$param_name}) { # create a "writeable" param_store entry at local level - my $new_param_entry = (not defined $param_entry)? { name => $param_name, _declared_by => [], }: dclone $param_entry; + my $new_param_entry = (not defined $param_entry)? { id => $param_name, _declared_by => [], }: dclone $param_entry; $param_store->[0]->{varnames}->{$param_name} = $new_param_entry; # adding to the "local" variable store $param_entry = $new_param_entry; } - if(defined $param_entry->{_value}) { - return $param_entry->{_value}; # already evaluated, no need to do again - } + # at this point, we have either found or created the param_entry in the local param_store. (We don't want to write to + # a higher-level param_store entry) + # before checking for a cached _value, see if there are local overrides (either via subst_map or from command-line) + my $subst_requests = $params->{assign}; for my $sr (@$subst_requests) { if(exists $sr->{$param_name}) { # allow undef value $param_entry->{_value} = $sr->{$param_name}; + + if(not defined $param_entry->{_value} and defined $subst->{required} and $subst->{required} eq q[yes]) { + $ewi->{additem}->($EWI_ERROR, 0, q[Undef value specified for required subst (param_name: ], $param_name, q[)]); + } + return $sr->{$param_name}; } } - if($param_entry->{subst_constructor}) { - my $vals; - unless($vals = $param_entry->{subst_constructor}->{vals}) { - $logger->($VLFATAL, q[subst_constructor attribute requires a vals attribute, param_name: ], $param_name); - } + if(defined $param_entry->{_value}) { + return $param_entry->{_value}; # already evaluated, return cached value + } + + $retval = resolve_subst_constructor($param_name, $param_entry->{subst_constructor}, $params, $ewi, $irp); + + if(not defined $retval) { + $retval = resolve_param_default($param_name, $param_entry->{default}, $params, $ewi, $irp); + } - unless(ref $vals eq q[ARRAY]) { - $logger->($VLFATAL, q[subst_constructor vals attribute must be array, param_name: ], $param_name); + if(not defined $retval) { + if($retval = resolve_ifnull($param_name, $subst->{ifnull}, $params, $ewi, $irp)) { + return $retval; # note: result of ifnull evaluation not assigned to variable } + elsif($subst->{required} and ($subst->{required} eq q[yes])) { + $ewi->{additem}->($EWI_ERROR, 0, q[No value found for required subst (param_name: ], $param_name, q[)]); + return; + } + } - for my $i (reverse (0..$#$vals)) { - if(ref $vals->[$i] eq q[HASH] and $vals->[$i]->{subst}) { - $vals->[$i] = fetch_subst_value($vals->[$i]->{subst}, $param_store, $subst_requests); - if(ref $vals->[$i] eq q[ARRAY]) { - splice(@$vals, $i, 1, (@{$vals->[$i]})); - } + if(not defined $retval) { + # caller should decide if undef is allowed, unless required is true + my $severity = (defined $param_entry->{required} and $param_entry->{required} eq q[yes])? $EWI_ERROR: $EWI_INFO; + $ewi->{additem}->($severity, 0, q[No value found for param_entry ], $param_name); + return; + } + + $param_entry->{_value} = $retval; + + return $retval; +} + +sub fetch_sp_value { + my ($sp_expr, $params, $ewi, $irp) = @_; + my $param_entry; + my $retval; + + my $sper = ref $sp_expr; + if($sper) { + if($sper eq q[HASH]) { + if($sp_expr->{subst}) { + # subst directive + $retval = fetch_subst_value($sp_expr, $params, $ewi, $irp); + } + elsif($sp_expr->{subst_constructor}) { + # solo subst_constructor + $retval = resolve_subst_constructor(q[ID], $sp_expr->{subst_constructor}, $params, $ewi); + } + else { + # ERROR - unrecognised hash ref type } } + elsif($sper eq q[ARRAY]) { + process_array($sp_expr, $params, $ewi, $irp); + } + else { + # ERROR - unrecognised ref type + } + } + else { + return $sp_expr; + } +} - $retval = resolve_subst_array($param_entry, $vals); +sub resolve_subst_constructor { + my ($id, $subst_constructor, $params, $ewi, $irp) = @_; - if(not defined $retval) { - $retval = $param_entry->{default}; - } - if(not defined $retval) { - # caller should decide if undef is allowed, unless required is true - my $severity = (defined $param_entry->{required} and $param_entry->{required} eq q[yes])? $VLFATAL: $VLMED; - $logger->($severity, q[INFO: Undefined elements in subst_param array: ], $param_entry->{id}); - return; - } + if(not defined $subst_constructor) { return; } + + my $vals; + unless($vals = $subst_constructor->{vals}) { + $ewi->{additem}->($EWI_ERROR, 0, q[subst_constructor attribute requires a vals attribute, param_name: ], $id); + return; } - elsif(defined $param_entry->{default}) { - $param_entry->{_value} = $param_entry->{default}; # be careful here - don't set _value for a higher-level param_store - return $param_entry->{default}; + + unless(ref $vals eq q[ARRAY]) { + $ewi->{additem}->($EWI_ERROR, 0, q[subst_constructor vals attribute must be array, param_name: ], $id); + return; } - else { - # caller should decide if undef is allowed, unless required is true - my $severity = (defined $param_entry->{required} and $param_entry->{required} eq q[yes])? $VLFATAL: $VLMED; - $logger->($severity, q[No value found for param_entry ], $param_name); + + $vals = process_array($vals, $params, $ewi, $irp); + if(not defined $vals) { + $ewi->{additem}->($EWI_ERROR, 0, q[Error B processing subst_constructor, param_name: ], $id); return; } - $param_entry->{_value} = $retval; # be careful here - don't set _value for a higher-level param_store + return postprocess_subst_array($id, $subst_constructor, $ewi); +} - return $retval; +################################## +# process_array +# flatten any non-scalar elements +################################## +sub process_array { + my ($arr, $params, $ewi, $irp) = @_; + + for my $i (reverse (0..$#$arr)) { + if(ref $arr->[$i] eq q[HASH]) { + if($arr->[$i]->{subst}) { + $arr->[$i] = fetch_subst_value($arr->[$i], $params, $ewi, $irp); + } + else { + $ewi->{additem}->($EWI_ERROR, 0, q[Non-subst hash ref not permitted in array, element ], $i); + return; + } + } + + if(ref $arr->[$i] eq q[ARRAY]) { + $arr->[$i] = process_array($arr->[$i], $params, $ewi, $irp); # in case the element was a simple array ref, not a subst directive + splice(@$arr, $i, 1, (@{$arr->[$i]})); + } + } + + return $arr; } -####################################################################################################### -# resolve_subst_array: +################################################################################################## +# postprocess_subst_array: # caller will have already flattened the array (i.e. no ref elements) # process as specified by op directives (pack, concat,...) -# validate proposed substitution value -# 1. if it contains any undef elements, it is invalid. -# 2. if it contains any null string elements but no allow_null_strings opt, it is invalid. (TBI) -####################################################################################################### -sub resolve_subst_array { - my ($subst_param, $subst_value) = @_; +# validate proposed substitution value. If it contains any undef elements, it is invalid (caller +# determines severity of error) +################################################################################################## +sub postprocess_subst_array { + my ($param_id, $subst_constructor, $ewi) = @_; + my $subst_value=$subst_constructor->{vals}; if(ref $subst_value ne q[ARRAY]) { - $logger->($VLMIN, q[Attempt to substitute array for non-array in substitutable param (], - $subst_param->{param_name}, - q[ for ], $subst_param->{attrib_name}, - q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]), q[)]); + $ewi->{additem}->($EWI_INFO, 0, q[vals attribute must be an array ref (param: ], $param_id, q[)]); return; } - my $subst_constructor = $subst_param->{subst_constructor}; my $ops=$subst_constructor->{postproc}->{op}; - if(defined $ops and ref $ops ne q[ARRAY]) { $ops = [ $ops ]; } + if(defined $ops) { + my $ro = ref $ops; + if(not $ro) { + $ops = [ $ops ]; + } + elsif($ro ne q[ARRAY]) { + $ewi->{additem}->($EWI_INFO, 0, q[ops attribute must be either scalar or array ref (not ], $ro, q[ ref) - disregarding]); + + $ops = []; + } + } + else { + $ops = []; + } # if (post-pack) array contains undefs, it is invalid if(any { ! defined($_) } @$subst_value) { @@ -543,7 +727,7 @@ sub resolve_subst_array { } else { # decision about fatality should be left to the caller - $logger->($VLMED, q[INFO: Undefined elements in subst_param array: ], $subst_param->{id}); + $ewi->{additem}->($EWI_INFO, 0, q[INFO: Undefined elements in subst_param array: ], $param_id); return; } } @@ -565,13 +749,44 @@ sub resolve_subst_array { $subst_value = join $pad, @$subst_value; } else { - $logger->($VLFATAL, q[Unrecognised op: ], $op, q[ in subst_param: ], $subst_param->{param_name}); + $ewi->{additem}->($EWI_ERROR, 0, q[Unrecognised op: ], $op, q[ in subst_param: ], $param_id); } } return $subst_value; } +sub resolve_param_default { + my ($id, $default, $params, $ewi, $irp) = @_; + + if(not defined $default) { return; } + + return fetch_sp_value($default, $params, $ewi, $irp); +} + +sub resolve_ifnull { + my ($id, $ifnull, $params, $ewi, $irp) = @_; + + if(not defined $ifnull) { return; } + + return fetch_sp_value($ifnull, $params, $ewi, $irp); +} + + +sub report_pv_ewi { + my ($tree_node, $logger) = @_; + my $fatality = 0; + + if($tree_node->{ewi}->{report}->(0, $logger)) { $fatality = 1; } + + # do the same recursively for any children + for my $tn (@{$tree_node->{children}}) { + if($tn->{ewi}->{report}->(0, $logger)) { $fatality = 1; } + } + + return $fatality; # should return some kind of error indicator, I think +} + ####################################################################################### # flatten_tree: # @@ -725,36 +940,150 @@ sub get_child_prefix { return $child? $child->{node_prefix}: q[]; } -##################################################################### -# initialise_subst_requests: -# if a key is specified more than once, its value becomes a list ref -##################################################################### -sub initialise_subst_requests { +###################################################################### +# initialise_params: +# Record any parameter values set from the command line. A separate +# store is used for "localised" parameter setting (ones applied when +# subst_requests store for VTFILE expansion is set up). +# If a key is specified more than once, its value becomes a list ref. +# an empty initial param_store is added. +###################################################################### +sub initialise_params { + my ($keys, $vals, $nullkeys, $param_vals_fns) = @_; + + my $pv = {}; + + $pv = construct_pv($keys, $vals, $nullkeys); + + return combine_pvs($param_vals_fns, $pv); +} + +sub construct_pv { my ($keys, $vals, $nullkeys) = @_; - my %subst_requests = (); + my $pv; + my $subst_requests = {}; + my $subst_map_overrides = {}; if(@$keys != @$vals) { croak q[Mismatch between keys and vals]; } for my $nullkey (@$nullkeys) { - $subst_requests{$nullkey} = undef; + $subst_requests->{$nullkey} = undef; } - for my $i (0..$#{$keys}) { - if(defined $subst_requests{$keys->[$i]}) { - if(ref $subst_requests{$keys->[$i]} ne q[ARRAY]) { - $subst_requests{$keys->[$i]} = [ $subst_requests{$keys->[$i]} ]; + if(@{$keys}) { + for my $i (0..$#{$keys}) { + my ($locality, $param_name) = _parse_localised_param_name($keys->[$i]); + my $param_value = $vals->[$i]; + + if($locality) { + # put it in the subst_map_overrides + if(defined $subst_map_overrides->{$locality}->{$param_name}) { + if(ref $subst_map_overrides->{$locality}->{$param_name} ne q[ARRAY]) { + $subst_map_overrides->{$locality}->{$param_name} = [ $subst_map_overrides->{$locality}->{$param_name} ]; + } + + push @{$subst_map_overrides->{$locality}->{$param_name}}, $param_value; + } + else { + $subst_map_overrides->{$locality}->{$param_name} = $param_value; + } } + elsif(defined $subst_requests->{$param_name}) { + if(ref $subst_requests->{$param_name} ne q[ARRAY]) { + $subst_requests->{$param_name} = [ $subst_requests->{$param_name} ]; + } - push @{$subst_requests{$keys->[$i]}}, $vals->[$i]; + push @{$subst_requests->{$param_name}}, $param_value; + } + else { + $subst_requests->{$param_name} = $param_value; + } } - else { - $subst_requests{$keys->[$i]} = $vals->[$i]; + + $pv = { param_store => [], assign => [ $subst_requests ], assign_local => $subst_map_overrides, }; + } + + return $pv; +} + +################################################################################### +# combine_pvs: +# parameters: +# param_vals_fns - ref to array of file names (JSON) containing parameter values +# clpv - parameter value structure created from command-line (optional) +# +# Combine a set of parameter value specifications, from files and/or command-line +################################################################################### +sub combine_pvs { + my ($param_vals_fns, $clpv) = @_; + my $target = {}; + my @all_pvs = (); + + # read the pv data from files, add to list + for my $fn (@{$param_vals_fns}) { + + if(! -e ${fn}) { + carp qq[Failed to find file $fn]; + next; } + + my $pv = from_json(read_file($fn)); + + push @all_pvs, $pv; + } + if($clpv) { + push @all_pvs, $clpv; # add parameter value structure created from command-line + } + +# Hash::Merge::set_behavior( 'RIGHT_PRECEDENT' ); +# Hash::Merge::set_behavior( 'LEFT_PRECEDENT' ); + # merge user-supplied params files with slightly modified RIGHT_PRECEDENT behaviour + Hash::Merge::specify_behavior( + { + 'SCALAR' => { + 'SCALAR' => sub { $_[1] }, + 'ARRAY' => sub { $_[1] }, # differs from RIGHT_PRECEDENT + 'HASH' => sub { $_[1] }, + }, + 'ARRAY' => { + 'SCALAR' => sub { $_[1] }, + 'ARRAY' => sub { [ @{ $_[0] }, @{ $_[1] } ] }, + 'HASH' => sub { $_[1] }, + }, + 'HASH' => { + 'SCALAR' => sub { $_[1] }, + 'ARRAY' => sub { $_[1] }, # differs from RIGHT_PRECEDENT + 'HASH' => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) }, + }, + }, + 'My Behavior', + ); + for my $pv (@all_pvs) { + + $target->{assign} = [ merge($target->{assign}->[0], $pv->{assign}->[0]) ]; + $target->{assign_local} = merge($target->{assign_local}, $pv->{assign_local}); } - return [ \%subst_requests ]; # note: the return value is a ref to a list of hash refs + $target->{assign} ||= []; + $target->{assign_local} ||= {}; + $target->{param_store} ||= []; + return $target; +} + +######################################################### +# _parse_localised_param_name +# this should allow for escaping of the delimiter (TODO) +######################################################### +sub _parse_localised_param_name { + my ($full_param_name) = @_; + + my @a = split /:/, $full_param_name; + my $param_name = pop @a; + my $locality = join q{:}, @a; + + return ($locality, $param_name); } ############################################################################################# @@ -764,11 +1093,7 @@ sub initialise_subst_requests { sub is_valid_name { my ($name, $id) = @_; - if(not $name) { - $logger->($VLMIN, q[No name for element with id ], $id); - } - - if(my $r = ref $name) { + if(defined $name and my $r = ref $name) { if($r eq q[ARRAY]) { $logger->($VLMIN, q{Element with id }, $id, q{ has name of type ARRAY ref, it should be a string. Elements: [ }, join(q[;], @$name), q{]}); @@ -837,6 +1162,32 @@ sub find_vtf { $logger->($VLFATAL, q[Failed to find vtf file: ], $vtf_fullname, q[ locally or on template_path: ], join q[:], @$template_path); } +############################################################### +# finalise_cmd: the value of the cmd attribute of an EXEC node +# must be either a string or an array ref of strings (no undef +# elements). Convert an array of strings and array refs to an +# array of strings using splice. +############################################################### +sub finalise_cmd { + my ($cmd) = @_; + + if(ref $cmd eq q[ARRAY]) { + $cmd = [ (grep { defined($_) } @$cmd) ]; # first remove any undefined elements + + for my $i (reverse (0..$#{$cmd})) { + if(not defined $cmd->[$i]) { + splice @{$cmd->[$i]}, $i, 1; + } + elsif(ref $cmd->[$i] eq q[ARRAY]) { + $cmd->[$i] = finalise_cmd($cmd->[$i]); + splice @{$cmd->[$i]}, $i, 1, @{$cmd->[$i]}; + } + } + } + + return $cmd; +} + sub mklogger { my ($verbosity_level, $log, $label) = @_; my $logf; @@ -879,4 +1230,70 @@ sub mklogger { } } +################################################################ +# for storing and reporting handling Error/Warning/Info messages +################################################################ +sub mkewi { + my ($init_label) = @_; + + my @labels = (); # list of strings which make up the message label + if (defined $init_label) { + push @labels, $init_label; + } + my @list = (); # list of messages + + return { + additem => sub { + my ($type, $subclass, @ms) = @_; + + my $label = join(":", @labels); + my $ms = join("", @ms); + + my $full_ms = sprintf "(%s) - %s", $label, $ms; + + push @list, { type => $type, subclass => $subclass, ms => $full_ms }; + + return scalar @list; + }, + addlabel => sub { + my (@label_elements) = @_; + + my $label = join("", @label_elements); + + push @labels, $label; + + return $label; + }, + removelabel => sub { + if(@labels > 0) { + pop @labels; + } + + return scalar @labels; + }, + clearlabels => sub { + @labels = (); + + return; + }, + clearitems => sub { + @list = (); + + return; + }, + report => sub { + my ($fatality_level, $logger) = @_; + my $ewi_retstat = 0; + my %ewi_type_names = ( $EWI_ERROR => q[Error], $EWI_WARNING => q[Warning], $EWI_INFO => q[Info], ); + + for my $ewi_item (@list) { + if($ewi_item->{type} == $EWI_ERROR and $ewi_item->{subclass} <= $fatality_level) { $ewi_retstat = 1; } + + $logger->($VLMIN, join("\t", ($ewi_type_names{$ewi_item->{type}}, $ewi_item->{subclass}, $ewi_item->{ms},))); + } + + return $ewi_retstat; + } + } +} From ca8b62cc607e9f2850042d61280956eca79236dc Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 19 Aug 2015 09:28:55 +0100 Subject: [PATCH 04/40] viv.pl - make __IN__ / __OUT__ naming convention for ports mandatory --- bin/viv.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/viv.pl b/bin/viv.pl index f7af14958..c8f0fa73a 100755 --- a/bin/viv.pl +++ b/bin/viv.pl @@ -240,7 +240,8 @@ sub _update_node_data_xfer { #ensure port is connected to in manner suggested by naming convention croak 'Node '.($node->{'id'})." port $port connected as ".($edge_side == $FROM?q("from"):q("to")) if (($inout eq q(OUT))^($edge_side == $FROM)); } else { - $logger->($VLMED, 'Node '.($node->{'id'})." has poorly described port $port (no _{IN,OUT}__ {suf,pre}fix)\n"); +# $logger->($VLMED, 'Node '.($node->{'id'})." has poorly described port $port (no _{IN,OUT}__ {suf,pre}fix)\n"); + croak 'Node '.($node->{'id'})." has poorly described port $port (no _{IN,OUT}__ {suf,pre}fix)\n"; } my $cmd = $node->{'cmd'}; for my$cmd_part ( ref $cmd eq 'ARRAY' ? @{$cmd}[1..$#{$cmd}] : ($node->{'cmd'}) ){ From 160da571bdb54212eec85c2c42f06d4edd0fd918 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 19 Aug 2015 09:50:01 +0100 Subject: [PATCH 05/40] add new vtfp.pl tests and test data --- t/10-vtfp-array_expansion.t | 18 ++++++++ t/10-vtfp-param_ring.t | 41 +++++++++++++++++ t/10-vtfp-pv.t | 62 ++++++++++++++++++++++++++ t/10-vtfp-subst_directive.t | 18 ++++++++ t/data/10-vtfp-00.json | 19 ++++++++ t/data/10-vtfp-array_expansion.json | 19 ++++++++ t/data/10-vtfp-param_ring.json | 19 ++++++++ t/data/simple_carping_pipeline1.vtf | 40 ----------------- t/data/simple_failing_io_pipeline1.vtf | 40 ----------------- t/data/simple_failing_io_pipeline2.vtf | 40 ----------------- t/data/simple_failing_pipeline.vtf | 33 -------------- t/data/simple_pipeline.vtf | 40 ----------------- 12 files changed, 196 insertions(+), 193 deletions(-) create mode 100644 t/10-vtfp-array_expansion.t create mode 100644 t/10-vtfp-param_ring.t create mode 100644 t/10-vtfp-pv.t create mode 100644 t/10-vtfp-subst_directive.t create mode 100644 t/data/10-vtfp-00.json create mode 100644 t/data/10-vtfp-array_expansion.json create mode 100644 t/data/10-vtfp-param_ring.json delete mode 100644 t/data/simple_carping_pipeline1.vtf delete mode 100644 t/data/simple_failing_io_pipeline1.vtf delete mode 100644 t/data/simple_failing_io_pipeline2.vtf delete mode 100644 t/data/simple_failing_pipeline.vtf delete mode 100644 t/data/simple_pipeline.vtf diff --git a/t/10-vtfp-array_expansion.t b/t/10-vtfp-array_expansion.t new file mode 100644 index 000000000..14d3411d2 --- /dev/null +++ b/t/10-vtfp-array_expansion.t @@ -0,0 +1,18 @@ +use strict; +use warnings; +use Test::More tests => 1; +use Test::Deep; +use Perl6::Slurp; +use Data::Dumper; +use JSON; + +my $template = q[t/data/10-vtfp-array_expansion.json]; + +{ +# the template contains a set of possibly multivalued parameters p1 - p5 which should expand into a list (array ref) in the cmd attribute of its one node + +my $vtfp_results = from_json(slurp "bin/vtfp.pl $template |"); +my $c = {edges=> [], nodes => [ {cmd => [q~/bin/echo~,q~1A~,q~1B~,q~2A~,q~3A~,q~3B~,q~3C~,q~3D~,q~4A~,q~5A~,q~5B~], type => q~EXEC~, id => q~n1~}]}; +cmp_deeply ($vtfp_results, $c, 'first array element expansion test'); +} + diff --git a/t/10-vtfp-param_ring.t b/t/10-vtfp-param_ring.t new file mode 100644 index 000000000..13e897fab --- /dev/null +++ b/t/10-vtfp-param_ring.t @@ -0,0 +1,41 @@ +use strict; +use warnings; +use Test::More tests => 3; +use Test::Deep; +use Perl6::Slurp; +use Data::Dumper; +use JSON; + +my $template = q[t/data/10-vtfp-param_ring.json]; + +{ +# the template contains a ring of parameter defaults p2->p3->p4->p5->p2. This will lead to an infinite recursion error +# unless the default ring is broken by giving one of the parameters a value (I arbitrarily selected p3). Parameters +# p1 - p4 will appear in the resulting cmd attribute (if they are set). + +my $vtfp_results = from_json(slurp "bin/vtfp.pl -keys p3 -vals break $template |"); +#print "\n\n\n", q[Dump1: ], Dumper($vtfp_results), "\n\n\n"; +my $c = {edges=> [], nodes => [ {cmd => [q~/bin/echo~,q~one~,q~break~,q~break~,q~break~], type => q~EXEC~, id => q~n1~}]}; +cmp_deeply ($vtfp_results, $c, 'first parameter ring test'); +} + +{ +# this is a slightly more complicated version of the previous test. It sets two parameter values in the ring, and one +# not involved the ring (p1) + +my $vtfp_results = from_json(slurp "bin/vtfp.pl -keys p1,p2,p4 -vals first,second,fourth $template |"); +#print "\n\n\n", q[Dump2: ], Dumper($vtfp_results), "\n\n\n"; +my $c = {edges => [], nodes => [ {cmd => [q~/bin/echo~,q~first~,q~second~,q~fourth~,q~fourth~], type => q~EXEC~, id => q~n1~}]}; +cmp_deeply ($vtfp_results, $c, 'second parameter ring test'); +} + +{ +# another variant of the first previous test. It nullifies the first parameter, then sets the value for p5 (in the ring, +# but not referenced directly in the node cmd) to confirm that the value propagates through the defaults to p2, p3, and p4 + +my $vtfp_results = from_json(slurp "bin/vtfp.pl -nullkeys p1 -keys p5 -vals fifth $template |"); +#print "\n\n\n", q[Dump3: ], Dumper($vtfp_results), "\n\n\n"; +my $c = {edges => [], nodes => [ {cmd => [q~/bin/echo~,q~fifth~,q~fifth~,q~fifth~], type => q~EXEC~, id => q~n1~}]}; +cmp_deeply ($vtfp_results, $c, 'third parameter ring test'); +} + diff --git a/t/10-vtfp-pv.t b/t/10-vtfp-pv.t new file mode 100644 index 000000000..bafe6e845 --- /dev/null +++ b/t/10-vtfp-pv.t @@ -0,0 +1,62 @@ +use strict; +use warnings; +use Carp; +use Test::More tests => 3; +use Test::Deep; +use Perl6::Slurp; +use Data::Dumper; +use JSON; + +my $template = q[t/data/10-vtfp-pv.json]; +my $pv_file = q[t/data/10-vtfp-pv.pv]; +my $processed_template = q[t/data/10-vtfp-pv-processed.json]; + +# just export and reimport parameter values for a template +subtest 'pv0' => sub { + plan tests => 3; + + system(qq[bin/vtfp.pl -verbosity_level 0 -o $processed_template -export_param_vals $pv_file $template]) == 0 or croak q[Failed to export params]; + my $pv_data = from_json(slurp $pv_file); + my $expected = {assign_local => {} ,param_store => [], assign => []}; + cmp_deeply ($pv_data, $expected, '(ts1) exported parameter values as expected'); + + my $vtfp_results = from_json(slurp "bin/vtfp.pl -verbosity_level 0 -param_vals $pv_file $template |"); + my $c = from_json(slurp $processed_template); + $expected = {edges=> [], nodes => [ {cmd => [q~/bin/echo~,q~The~,q~funeral~,q~ends~,q~with~,q~a~,q~mournful~,q~fireworks~,q~display~], type => q~EXEC~, id => q~n1~}]}; + cmp_deeply ($vtfp_results, $c, '(ts1) json config generated using pv file matches original generated config'); + cmp_deeply ($vtfp_results, $expected, '(ts1) json config generated using pv file as expected'); +}; + +# export parameter values from a template, overriding default for parameter "subject" from the command-line. Reimport the parameter values. +subtest 'pv1' => sub { + plan tests => 3; + + system(qq[bin/vtfp.pl -verbosity_level 0 -o $processed_template -export_param_vals $pv_file -keys subject,adj -vals party,deafening $template]) == 0 or croak q[Failed to export params]; + my $pv_data = from_json(slurp $pv_file); + my $expected = {assign_local => {} ,param_store => [], assign => [ {subject => q~party~, adj =>q~deafening~, }]}; + cmp_deeply ($pv_data, $expected, '(ts2) exported parameter values as expected'); + + my $vtfp_results = from_json(slurp "bin/vtfp.pl -verbosity_level 0 -param_vals $pv_file $template |"); + my $c = from_json(slurp $processed_template); + $expected = {edges=> [], nodes => [ {cmd => [q~/bin/echo~,q~The~,q~party~,q~ends~,q~with~,q~a~,q~deafening~,q~fireworks~,q~display~], type => q~EXEC~, id => q~n1~}]}; + cmp_deeply ($vtfp_results, $c, '(ts2) json config generated using pv file matches original generated config'); + cmp_deeply ($vtfp_results, $expected, '(ts2) json config generated using pv file as expected'); +}; + +# export parameter values from a template, overriding defaults for parameters "subject" and "prepobj" from the command-line. Reimport the parameter values. +subtest 'pv2' => sub { + plan tests => 3; + + system(qq[bin/vtfp.pl -verbosity_level 0 -o $processed_template -export_param_vals $pv_file -keys subject,prepobj -vals world,whimper -nullkeys adj $template]) == 0 or croak q[Failed to export params]; + my $pv_data = from_json(slurp $pv_file); + my $expected = {assign_local => {} ,param_store => [], assign => [ {subject => q~world~, prepobj => q~whimper~, adj => undef}]}; + cmp_deeply ($pv_data, $expected, '(ts3) exported parameter values as expected'); + + my $vtfp_results = from_json(slurp "bin/vtfp.pl -verbosity_level 0 -param_vals $pv_file $template |"); + my $c = from_json(slurp $processed_template); + $expected = {edges=> [], nodes => [ {cmd => [q~/bin/echo~,q~The~,q~world~,q~ends~,q~with~,q~a~,q~whimper~], type => q~EXEC~, id => q~n1~}]}; + cmp_deeply ($vtfp_results, $c, '(ts4) json config generated using pv file matches original generated config'); + cmp_deeply ($vtfp_results, $expected, '(ts4) json config generated using pv file as expected'); +}; + +1; diff --git a/t/10-vtfp-subst_directive.t b/t/10-vtfp-subst_directive.t new file mode 100644 index 000000000..14d3411d2 --- /dev/null +++ b/t/10-vtfp-subst_directive.t @@ -0,0 +1,18 @@ +use strict; +use warnings; +use Test::More tests => 1; +use Test::Deep; +use Perl6::Slurp; +use Data::Dumper; +use JSON; + +my $template = q[t/data/10-vtfp-array_expansion.json]; + +{ +# the template contains a set of possibly multivalued parameters p1 - p5 which should expand into a list (array ref) in the cmd attribute of its one node + +my $vtfp_results = from_json(slurp "bin/vtfp.pl $template |"); +my $c = {edges=> [], nodes => [ {cmd => [q~/bin/echo~,q~1A~,q~1B~,q~2A~,q~3A~,q~3B~,q~3C~,q~3D~,q~4A~,q~5A~,q~5B~], type => q~EXEC~, id => q~n1~}]}; +cmp_deeply ($vtfp_results, $c, 'first array element expansion test'); +} + diff --git a/t/data/10-vtfp-00.json b/t/data/10-vtfp-00.json new file mode 100644 index 000000000..0b2e40649 --- /dev/null +++ b/t/data/10-vtfp-00.json @@ -0,0 +1,19 @@ +{ +"description":"Test infinite recursion prevention in defaults for parameter resolution. Also provides an example of a parameter taking its default value from another parameter", +"version":"1.0", +"subst_params":[ + { "id": "p1", "default": "one" }, + { "id": "p2", "default": {"subst":"p3"} }, + { "id": "p3", "default": {"subst":"p4"} }, + { "id": "p4", "default": {"subst":"p5"} }, + { "id": "p5", "default": {"subst":"p2"} } +], +"nodes":[ + { + "id":"n1", + "type":"EXEC", + "cmd":[ "echo", {"subst":"p1"}, {"subst":"p2"} ] + } +] +} + diff --git a/t/data/10-vtfp-array_expansion.json b/t/data/10-vtfp-array_expansion.json new file mode 100644 index 000000000..272b9060e --- /dev/null +++ b/t/data/10-vtfp-array_expansion.json @@ -0,0 +1,19 @@ +{ +"description":"Test expansion of parameters, specifically cases where there are multiple instances of multi-valued parameters (arrays)", +"version":"1.0", +"subst_params":[ + { "id": "p1", "subst_constructor":{ "vals":[ "1A", "1B" ] } }, + { "id": "p2", "subst_constructor":{ "vals":[ "2A" ] } }, + { "id": "p3", "subst_constructor":{ "vals":[ "3A", "3B", "3C", "3D" ] } }, + { "id": "p4", "default":"4A"}, + { "id": "p5", "subst_constructor":{ "vals":[ "5A", "5B" ] } } +], +"nodes":[ + { + "id":"n1", + "type":"EXEC", + "cmd":[ "echo", {"subst":"p1"}, {"subst":"p2"}, {"subst":"p3"}, {"subst":"p4"}, {"subst":"p5"} ] + } +] +} + diff --git a/t/data/10-vtfp-param_ring.json b/t/data/10-vtfp-param_ring.json new file mode 100644 index 000000000..189bd9c9d --- /dev/null +++ b/t/data/10-vtfp-param_ring.json @@ -0,0 +1,19 @@ +{ +"description":"Test infinite recursion prevention in defaults for parameter resolution. Also provides an example of a parameter taking its default value from another parameter", +"version":"1.0", +"subst_params":[ + { "id": "p1", "default": "one" }, + { "id": "p2", "default": {"subst":"p3"} }, + { "id": "p3", "default": {"subst":"p4"} }, + { "id": "p4", "default": {"subst":"p5"} }, + { "id": "p5", "default": {"subst":"p2"} } +], +"nodes":[ + { + "id":"n1", + "type":"EXEC", + "cmd":[ "echo", {"subst":"p1"}, {"subst":"p2"}, {"subst":"p3"}, {"subst":"p4"} ] + } +] +} + diff --git a/t/data/simple_carping_pipeline1.vtf b/t/data/simple_carping_pipeline1.vtf deleted file mode 100644 index 28915b833..000000000 --- a/t/data/simple_carping_pipeline1.vtf +++ /dev/null @@ -1,40 +0,0 @@ -{ - "description": "minimal carping test pipeline - works, but I/O port names are poorly named. See log for warning", - "nodes":[ - { "id": "n1", - "type": "EXEC", - "cmd": "echo stuff", - "use_STDIN": 0, - "use_STDOUT": 1 - }, - { "id": "n2", - "type": "EXEC", - "cmd": ["cat"], - "use_STDIN": 1, - "use_STDOUT": 1 - }, - { "id": "m", - "type": "EXEC", - "cmd": ["echo", "stuff"], - "use_STDIN": false, - "use_STDOUT": true - }, - { "id": "d", - "type": "EXEC", - "cmd": "diff __IN1__ __IN2__", - "use_STDIN": 0, - "use_STDOUT": 0 - } - ], - "edges":[ - { "from": "n1", - "to": "n2" - }, - { "from": "n2", - "to": "d:__IN1__" - }, - { "from": "m", - "to": "d:__IN2__" - } - ] -} diff --git a/t/data/simple_failing_io_pipeline1.vtf b/t/data/simple_failing_io_pipeline1.vtf deleted file mode 100644 index 1b946d304..000000000 --- a/t/data/simple_failing_io_pipeline1.vtf +++ /dev/null @@ -1,40 +0,0 @@ -{ - "description": "minimal failing test pipeline. Port name in edge does not match one in node definition. Also carps about poorly named __IN1__ and __INN2__ ports", - "nodes":[ - { "id": "n1", - "type": "EXEC", - "cmd": "echo stuff", - "use_STDIN": 0, - "use_STDOUT": 1 - }, - { "id": "n2", - "type": "EXEC", - "cmd": ["cat"], - "use_STDIN": 1, - "use_STDOUT": 1 - }, - { "id": "m", - "type": "EXEC", - "cmd": ["echo", "stuff"], - "use_STDIN": false, - "use_STDOUT": true - }, - { "id": "d", - "type": "EXEC", - "cmd": "diff __IN1__ __IN2__", - "use_STDIN": 0, - "use_STDOUT": 0 - } - ], - "edges":[ - { "from": "n1", - "to": "n2" - }, - { "from": "n2", - "to": "d:__IN1__" - }, - { "from": "m", - "to": "d:__INN2__" - } - ] -} diff --git a/t/data/simple_failing_io_pipeline2.vtf b/t/data/simple_failing_io_pipeline2.vtf deleted file mode 100644 index 91b9b683f..000000000 --- a/t/data/simple_failing_io_pipeline2.vtf +++ /dev/null @@ -1,40 +0,0 @@ -{ - "description": "minimal failing test pipeline. Port names in edges match the node definitions, but are of the wrong type", - "nodes":[ - { "id": "n1", - "type": "EXEC", - "cmd": "echo stuff", - "use_STDIN": 0, - "use_STDOUT": 1 - }, - { "id": "n2", - "type": "EXEC", - "cmd": ["cat"], - "use_STDIN": 1, - "use_STDOUT": 1 - }, - { "id": "m", - "type": "EXEC", - "cmd": ["echo", "stuff"], - "use_STDIN": false, - "use_STDOUT": true - }, - { "id": "d", - "type": "EXEC", - "cmd": "diff __IN_1__ __OUT_2__", - "use_STDIN": 0, - "use_STDOUT": 0 - } - ], - "edges":[ - { "from": "n1", - "to": "n2" - }, - { "from": "n2", - "to": "d:__IN_1__" - }, - { "from": "m", - "to": "d:__OUT_2__" - } - ] -} diff --git a/t/data/simple_failing_pipeline.vtf b/t/data/simple_failing_pipeline.vtf deleted file mode 100644 index dac3cb42d..000000000 --- a/t/data/simple_failing_pipeline.vtf +++ /dev/null @@ -1,33 +0,0 @@ -{ - "description": "minimal failing test pipeline. Fails because of false in cmd. Also carps about poorly named __IN1__ and __IN2__ ports", - "nodes":[ - { "id": "n1", - "type": "EXEC", - "cmd": "echo stuff; sleep 1 && cat" - }, - { "id": "n2", - "type": "EXEC", - "ocmd": "cat; false", - "cmd": "head -1; false" - }, - { "id": "m", - "type": "EXEC", - "cmd": ["echo", "stuff"] - }, - { "id": "d", - "type": "EXEC", - "cmd": "diff __IN1__ __IN2__" - } - ], - "edges":[ - { "from": "n1", - "to": "n2" - }, - { "from": "n2", - "to": "d:__IN1__" - }, - { "from": "m", - "to": "d:__IN2__" - } - ] -} diff --git a/t/data/simple_pipeline.vtf b/t/data/simple_pipeline.vtf deleted file mode 100644 index 631f2835d..000000000 --- a/t/data/simple_pipeline.vtf +++ /dev/null @@ -1,40 +0,0 @@ -{ - "description": "minimal test pipeline", - "nodes":[ - { "id": "n1", - "type": "EXEC", - "cmd": "echo stuff", - "use_STDIN": 0, - "use_STDOUT": 1 - }, - { "id": "n2", - "type": "EXEC", - "cmd": ["cat"], - "use_STDIN": 1, - "use_STDOUT": 1 - }, - { "id": "m", - "type": "EXEC", - "cmd": ["echo", "stuff"], - "use_STDIN": false, - "use_STDOUT": true - }, - { "id": "d", - "type": "EXEC", - "cmd": "diff __IN_1__ __IN_2__", - "use_STDIN": 0, - "use_STDOUT": 0 - } - ], - "edges":[ - { "from": "n1", - "to": "n2" - }, - { "from": "n2", - "to": "d:__IN_1__" - }, - { "from": "m", - "to": "d:__IN_2__" - } - ] -} From 394c2b278e40d4499d306130550012c22f7e2e78 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 19 Aug 2015 09:50:55 +0100 Subject: [PATCH 06/40] amended templates - observe port naming convention; move some cmds from subst_val to nodes sections for readability --- ...ge2_humansplit_notargetalign_template.json | 58 ++-- ...nment_wtsi_stage2_humansplit_template.json | 57 ++-- .../vtlib/alignment_wtsi_stage2_template.json | 31 +- data/vtlib/auxmerge_prep.json | 8 +- data/vtlib/auxmerge_prep_realign.json | 10 +- ...2bam_phix_deplex_wtsi_stage1_template.json | 44 +-- data/vtlib/bwa_aln_alignment.json | 42 +-- data/vtlib/bwa_aln_se_alignment.json | 22 +- data/vtlib/bwa_mem_alignment.json | 28 +- data/vtlib/final_output_noalign_prep.json | 201 ++--------- data/vtlib/final_output_prep.json | 286 ++-------------- data/vtlib/merge_aligned.json | 318 +++++++----------- data/vtlib/merge_final_output_prep.json | 38 ++- data/vtlib/post_alignment.json | 84 +---- data/vtlib/pre_alignment.json | 18 +- data/vtlib/pre_alignment_realign.json | 18 +- data/vtlib/realignment_wtsi_template.json | 6 +- data/vtlib/seqchksum.json | 20 +- data/vtlib/seqchksum_hs.json | 13 +- data/vtlib/seqchksum_realign.json | 12 +- data/vtlib/tophat2_alignment.json | 254 +++----------- 21 files changed, 426 insertions(+), 1142 deletions(-) diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json index 44bebac77..ff7c6456e 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json @@ -58,8 +58,7 @@ "id":"post_alignment_hs", "required":"yes", "subst_constructor":{ - "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"post_alignment_hs_name"}, ".json" - ], + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"post_alignment_hs_name"}, ".json" ], "postproc":{"op":"concat", "pad":""} } }, @@ -115,25 +114,6 @@ "postproc":{"op":"concat","pad":""} } }, - { - "id":"alignment_filter_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"java_cmd"}, - "-Xmx1000m", - "-jar", {"subst":"alignment_filter_jar"}, - "VALIDATION_STRINGENCY=SILENT", - "CREATE_MD5_FILE=false", - "VERBOSITY=INFO", - "QUIET=false", - "COMPRESSION_LEVEL=5", - "MAX_RECORDS_IN_RAM=500000", - "CREATE_INDEX=false" - ], - "postproc":{"op":"noconcat", "pad":" "} - } - }, {"id":"af_metrics_name","required":"no","default":"alignment_filter_metrics.json"}, { "id":"af_metrics", @@ -210,7 +190,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamcollate2", "collate=1", "level=0"], + "cmd":[ "bamcollate2", "collate=1", "level=0" ], "comment":"ensure BAM records are gathered by template i.e. queryname" }, { @@ -219,7 +199,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] }, { "id":"pre_alignment_hs", @@ -262,7 +242,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamrecompress verbose=0 numthreads=2" + "cmd":[ "bamrecompress", "verbose=0", "numthreads=2" ] }, { "id":"initial_phix_aln_bam", @@ -281,7 +261,25 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__HUMAN_SPLIT_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=__HUMAN_SPLIT_OUTBAM__", "UNALIGNED=/dev/stdout", "METRICS_FILE=__AF_METRICS__"] + "orig_cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__HUMAN_SPLIT_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=__HUMAN_SPLIT_OUTBAM__", "UNALIGNED=/dev/stdout", "METRICS_FILE=__AF_METRICS__"], + "cmd": [ + {"subst":"java_cmd"}, + "-Xmx1000m", + "-jar", {"subst":"alignment_filter_jar"}, + "VALIDATION_STRINGENCY=SILENT", + "CREATE_MD5_FILE=false", + "VERBOSITY=INFO", + "QUIET=false", + "COMPRESSION_LEVEL=5", + "MAX_RECORDS_IN_RAM=500000", + "CREATE_INDEX=false", + "IN=__PHIX_BAM_IN__", + "IN=__HUMAN_SPLIT_BAM_IN__", + "OUT=__PHIX_BAM_OUT__", + "OUT=__HUMAN_SPLIT_BAM_OUT__", + "UNALIGNED=/dev/stdout", + "METRICS_FILE=__AF_METRICS_OUT__" + ] }, { "id":"af_metrics", @@ -339,12 +337,12 @@ { "id":"hsref_to_alignment", "from":"hs_alignment_reference_genome", "to":"alignment_hs:reference" }, { "id":"alignment_hs_to_post_alignment_hs", "from":"alignment_hs", "to":"post_alignment_hs" }, { "id":"reference_dict_hs_to_post_alignment", "from":"reference_dict_hs", "to":"post_alignment_hs:reference_dict" }, - { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_INBAM__" }, - { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_INBAM__" }, - { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, + { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_BAM_IN__" }, + { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_BAM_IN__" }, + { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS_OUT__", "to":"af_metrics" }, { "id":"af_to_fopt", "from":"alignment_filter", "to":"final_output_prep_target" }, - { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_OUTBAM__", "to":"final_output_prep_phix" }, - { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_OUTBAM__", "to":"final_output_prep_hs" }, + { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_BAM_OUT__", "to":"final_output_prep_phix" }, + { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_BAM_OUT__", "to":"final_output_prep_hs" }, { "id":"src_bam_to_seqchksum", "from":"src_bam", "to":"seqchksum" }, { "id":"fopt_to_bam", "from":"final_output_prep_target", "to":"seqchksum:target_seqchksum" }, { "id":"fopp_to_bam_phix", "from":"final_output_prep_phix", "to":"seqchksum:phix_seqchksum" }, diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_template.json index ec51450a3..6dcdbfa3c 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_template.json @@ -150,25 +150,6 @@ "postproc":{"op":"concat","pad":""} } }, - { - "id":"alignment_filter_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"java_cmd"}, - "-Xmx1000m", - "-jar", {"subst":"alignment_filter_jar"}, - "VALIDATION_STRINGENCY=SILENT", - "CREATE_MD5_FILE=false", - "VERBOSITY=INFO", - "QUIET=false", - "COMPRESSION_LEVEL=5", - "MAX_RECORDS_IN_RAM=500000", - "CREATE_INDEX=false" - ], - "postproc":{"op":"noconcat", "pad":" "} - } - }, {"id":"af_metrics_name","required":"no","default":"alignment_filter_metrics.json"}, { "id":"af_metrics", @@ -245,7 +226,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamcollate2", "collate=1", "level=0"], + "cmd":[ "bamcollate2", "collate=1", "level=0" ], "comment":"ensure BAM records are gathered by template i.e. queryname" }, { @@ -254,7 +235,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] }, { "id":"pre_alignment_target", @@ -332,7 +313,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamrecompress verbose=0 numthreads=2" + "cmd":[ "bamrecompress", "verbose=0", "numthreads=2" ] }, { "id":"initial_phix_aln_bam", @@ -361,7 +342,25 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__HUMAN_SPLIT_INBAM__", "IN=__TARGET_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=__HUMAN_SPLIT_OUTBAM__", "OUT=/dev/stdout", "METRICS_FILE=__AF_METRICS__"] + "cmd": [ + {"subst":"java_cmd"}, + "-Xmx1000m", + "-jar", {"subst":"alignment_filter_jar"}, + "VALIDATION_STRINGENCY=SILENT", + "CREATE_MD5_FILE=false", + "VERBOSITY=INFO", + "QUIET=false", + "COMPRESSION_LEVEL=5", + "MAX_RECORDS_IN_RAM=500000", + "CREATE_INDEX=false", + "IN=__PHIX_BAM_IN__", + "IN=__HUMAN_SPLIT_BAM_IN__", + "IN=__TARGET_BAM_IN__", + "OUT=__PHIX_BAM_OUT__", + "OUT=__HUMAN_SPLIT_BAM_OUT__", + "OUT=/dev/stdout", + "METRICS_FILE=__AF_METRICS_OUT__" + ] }, { "id":"af_metrics", @@ -421,18 +420,18 @@ { "id":"ref_to_alignment", "from":"alignment_reference_genome", "to":"alignment_target:reference" }, { "id":"alignment_target_to_post_alignment_target", "from":"alignment_target", "to":"post_alignment_target" }, { "id":"reference_dict_to_post_alignment", "from":"reference_dict", "to":"post_alignment_target:reference_dict" }, - { "id":"post_alignment_to_alignment_filter", "from":"post_alignment_target", "to":"alignment_filter:__TARGET_INBAM__" }, + { "id":"post_alignment_to_alignment_filter", "from":"post_alignment_target", "to":"alignment_filter:__TARGET_BAM_IN__" }, { "id":"t0_to_prealnhs", "from":"tee0:__HUMAN_SPLIT_OUT__", "to":"pre_alignment_hs" }, { "id":"prealnhs_to_alnhs", "from":"pre_alignment_hs", "to":"alignment_hs" }, { "id":"hsref_to_alignment", "from":"hs_alignment_reference_genome", "to":"alignment_hs:reference" }, { "id":"alignment_hs_to_post_alignment_hs", "from":"alignment_hs", "to":"post_alignment_hs" }, { "id":"reference_dict_hs_to_post_alignment", "from":"reference_dict_hs", "to":"post_alignment_hs:reference_dict" }, - { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_INBAM__" }, - { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_INBAM__" }, - { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, + { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_BAM_IN__" }, + { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_BAM_IN__" }, + { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS_OUT__", "to":"af_metrics" }, { "id":"af_to_fopt", "from":"alignment_filter", "to":"final_output_prep_target" }, - { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_OUTBAM__", "to":"final_output_prep_phix" }, - { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_OUTBAM__", "to":"final_output_prep_hs" }, + { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_BAM_OUT__", "to":"final_output_prep_phix" }, + { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_BAM_OUT__", "to":"final_output_prep_hs" }, { "id":"src_bam_to_seqchksum", "from":"src_bam", "to":"seqchksum" }, { "id":"fopt_to_bam", "from":"final_output_prep_target", "to":"seqchksum:target_seqchksum" }, { "id":"fopp_to_bam_phix", "from":"final_output_prep_phix", "to":"seqchksum:phix_seqchksum" }, diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 81c2ee2f5..557195829 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -190,7 +190,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ] }, { "id":"pre_alignment_target", @@ -232,7 +232,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamrecompress verbose=0 numthreads=2" + "cmd":[ "bamrecompress", "verbose=0", "numthreads=2" ] }, { "id":"initial_phix_aln_bam", @@ -251,7 +251,24 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__TARGET_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=/dev/stdout", "METRICS_FILE=__AF_METRICS__"] + "orig_cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_BAM_IN__", "IN=__TARGET_BAM_IN__", "OUT=__PHIX_BAM_OUT__", "OUT=/dev/stdout", "METRICS_FILE=__AF_METRICS_OUT__"], + "cmd": [ + {"subst":"java_cmd"}, + "-Xmx1000m", + "-jar", {"subst":"alignment_filter_jar"}, + "VALIDATION_STRINGENCY=SILENT", + "CREATE_MD5_FILE=false", + "VERBOSITY=INFO", + "QUIET=false", + "COMPRESSION_LEVEL=5", + "MAX_RECORDS_IN_RAM=500000", + "CREATE_INDEX=false", + "IN=__PHIX_BAM_IN__", + "IN=__TARGET_BAM_IN__", + "OUT=__PHIX_BAM_OUT__", + "OUT=/dev/stdout", + "METRICS_FILE=__AF_METRICS_OUT__" + ] }, { "id":"af_metrics", @@ -297,11 +314,11 @@ { "id":"ref_to_alignment", "from":"alignment_reference_genome", "to":"alignment_target:reference" }, { "id":"alignment_target_to_post_alignment_target", "from":"alignment_target", "to":"post_alignment_target" }, { "id":"reference_dict_to_post_alignment", "from":"reference_dict", "to":"post_alignment_target:reference_dict" }, - { "id":"post_alignment_to_alignment_filter", "from":"post_alignment_target", "to":"alignment_filter:__TARGET_INBAM__" }, - { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_INBAM__" }, - { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, + { "id":"post_alignment_to_alignment_filter", "from":"post_alignment_target", "to":"alignment_filter:__TARGET_BAM_IN__" }, + { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_BAM_IN__" }, + { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS_OUT__", "to":"af_metrics" }, { "id":"af_to_paf_target", "from":"alignment_filter", "to":"final_output_prep_target" }, - { "id":"af_to_paf_phix", "from":"alignment_filter:__PHIX_OUTBAM__", "to":"final_output_prep_phix" }, + { "id":"af_to_paf_phix", "from":"alignment_filter:__PHIX_BAM_OUT__", "to":"final_output_prep_phix" }, { "id":"src_bam_to_seqchksum", "from":"src_bam", "to":"seqchksum" }, { "id":"fopt_to_bam", "from":"final_output_prep_target", "to":"seqchksum:target_seqchksum" }, { "id":"fopp_to_bam_phix", "from":"final_output_prep_phix", "to":"seqchksum:phix_seqchksum" } diff --git a/data/vtlib/auxmerge_prep.json b/data/vtlib/auxmerge_prep.json index da3f14b97..ce5c66b6c 100644 --- a/data/vtlib/auxmerge_prep.json +++ b/data/vtlib/auxmerge_prep.json @@ -3,12 +3,8 @@ "description":"preprocessing of non/phix-aligned bam before merging back auxiliary tags using bam12auxmerge", "subgraph_io":{ "ports":{ - "inputs":{ - "_stdin_":"bamreset_pre_auxmerge" - }, - "outputs":{ - "_stdout_":"bamadapterclip_pre_auxmerge" - } + "inputs":{ "_stdin_":"bamreset_pre_auxmerge" }, + "outputs":{ "_stdout_":"bamadapterclip_pre_auxmerge" } } }, "nodes":[ diff --git a/data/vtlib/auxmerge_prep_realign.json b/data/vtlib/auxmerge_prep_realign.json index b6b9a361a..b4c584e3b 100644 --- a/data/vtlib/auxmerge_prep_realign.json +++ b/data/vtlib/auxmerge_prep_realign.json @@ -17,7 +17,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamreset resetaux=0 auxfilter=RG,PG,BC,RT,QT,tr,tq,br,qr level=0 verbose=0", + "cmd":[ "bamreset", "resetaux=0", "auxfilter=RG,PG,BC,RT,QT,tr,tq,br,qr", "level=0", "verbose=0" ], "comment":"bam12auxmerge <= 0.0.142 requires SQ headers removed. Alignment removal also required for bamadapterclip (at least 0.0.142)" }, { @@ -25,14 +25,10 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamadapterfind clip=1 verbose=0 level=0" + "cmd":[ "bamadapterfind", "clip=1", "verbose=0", "level=0" ] } ], "edges":[ - { - "id":"bamreset_to_bamadapterclip", - "from":"bamreset_pre_auxmerge", - "to":"bamadapterclip_pre_auxmerge" - } + { "id":"bamreset_to_bamadapterclip", "from":"bamreset_pre_auxmerge", "to":"bamadapterclip_pre_auxmerge" } ] } diff --git a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json index 3f7fa1751..9355a1775 100644 --- a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json +++ b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json @@ -163,7 +163,7 @@ "-jar",{"subst":"bamindexdecoder_jar"}, "I=/dev/stdin", "O=/dev/stdout", - "M=__METRICS_FILE__", + "M=__METRICS_FILE_OUT__", {"subst":"barcode_file_flag"} ], "postproc":{"op":"pack","pad":" "} @@ -277,11 +277,7 @@ { "id":"bamadapterfind", "type":"EXEC", - "cmd":[ - "bamadapterfind", - "md5=1", - {"subst":"bamadapterfind_md5file_flag"} - ], + "cmd":[ "bamadapterfind", "md5=1", {"subst":"bamadapterfind_md5file_flag"} ], "description":"Find and mark the adaptors used by Illumina" }, { @@ -293,7 +289,7 @@ "-jar", {"subst":"bamindexdecoder_jar"}, "I=/dev/stdin", "O=/dev/stdout", - "M=__METRICS_FILE__", + "M=__METRICS_FILE_OUT__", {"subst":"barcode_file_flag"} ] }, @@ -305,12 +301,12 @@ { "id":"bamcollate", "type":"EXEC", - "cmd":["bamcollate2", "collate=2", "level=0"] + "cmd":[ "bamcollate2", "collate=2", "level=0" ] }, { "id":"tee_decode", "type":"EXEC", - "cmd":[ "teepot", "-v", "-m", "2M", "__TD1__", "__TD2__" ] + "cmd":[ "teepot", "-v", "-m", "2M", "__TD1_OUT__", "__TD2_OUT__" ] }, { "id":"simple_cat", @@ -336,12 +332,12 @@ { "id":"bammerge", "type":"EXEC", - "cmd":[ "bam12auxmerge", "level=0", "rankstrip=1", "ranksplit=0", "zztoname=0", "clipreinsert=1", "__PREALN_BAM__" ] + "cmd":[ "bam12auxmerge", "level=0", "rankstrip=1", "ranksplit=0", "zztoname=0", "clipreinsert=1", "__PREALN_BAM_IN__" ] }, { "id":"prefilter", "type":"EXEC", - "cmd":[ "teepot", "-v", "-t", ".", "-m", "2M", "__PF1__", "__PF2__" ] + "cmd":[ "teepot", "-v", "-t", ".", "-m", "2M", "__PF1_OUT__", "__PF2_OUT__" ] }, { "id":"cat2", @@ -366,7 +362,7 @@ { "id":"apply_filter", "type":"EXEC", - "cmd":[ "spatial_filter", "-a", "-f", "-q", "-F __FILTER__", "/dev/stdin" ], + "cmd":[ "spatial_filter", "-a", "-f", "-q", "-F __FILTER_IN__", "/dev/stdin" ], "description":"Apply a spatial filter" }, { @@ -383,13 +379,7 @@ { "id":"splitter", "type":"EXEC", - "cmd":[ - {"subst":"samtools_executable"}, - "split", - "-f", - {"subst":"split_format"}, - "-" - ], + "cmd":[ {"subst":"samtools_executable"}, "split", "-f", {"subst":"split_format"}, "-" ], "description":"Split the BAM file into separate BAM files by Read Group" } ], @@ -398,17 +388,17 @@ { "id":"bamadapterfind_to_decoder", "from":"bamadapterfind", "to":"bamindexdecoder" }, { "id":"bamindexdecoder_to_collate", "from":"bamindexdecoder", "to":"bamcollate" }, { "id":"collate_to_tee", "from":"bamcollate", "to":"tee_decode" }, - { "id":"decoder_to_metrics", "from":"bamindexdecoder:__METRICS_FILE__", "to":"decoder_metrics" }, - { "id":"tee_decode_to_cat", "from":"tee_decode:__TD1__", "to":"simple_cat" }, - { "id":"cat_to_merge", "from":"simple_cat", "to":"bammerge:__PREALN_BAM__" }, - { "id":"tee_decode_to_alignment", "from":"tee_decode:__TD2__", "old_to":"bam2fastq", "to":"alignment_phix" }, - { "id":"ref_to_alignment", "from":"reference_phix", "orig_to":"bwa_mem:__REFERENCE_PHIX__", "to":"alignment_phix:reference" }, + { "id":"decoder_to_metrics", "from":"bamindexdecoder:__METRICS_FILE_OUT__", "to":"decoder_metrics" }, + { "id":"tee_decode_to_cat", "from":"tee_decode:__TD1_OUT__", "to":"simple_cat" }, + { "id":"cat_to_merge", "from":"simple_cat", "to":"bammerge:__PREALN_BAM_IN__" }, + { "id":"tee_decode_to_alignment", "from":"tee_decode:__TD2_OUT__", "old_to":"bam2fastq", "to":"alignment_phix" }, + { "id":"ref_to_alignment", "from":"reference_phix", "to":"alignment_phix:reference" }, { "id":"alignment_to_merge", "from":"alignment_phix", "to":"bammerge" }, { "id":"merge_to_prefilter", "from":"bammerge", "to":"prefilter" }, - { "id":"prefilter_to_create_filter", "from":"prefilter:__PF1__", "to":"create_filter" }, + { "id":"prefilter_to_create_filter", "from":"prefilter:__PF1_OUT__", "to":"create_filter" }, { "id":"create_filter_to_filter_file", "from":"create_filter", "to":"spatial_filter_file" }, - { "id":"filter_file_to_apply_filter", "from":"spatial_filter_file", "to":"apply_filter:__FILTER__" }, - { "id":"prefilter_to_tmp_bam", "from":"prefilter:__PF2__", "to":"pre_spatial_filter_bam" }, + { "id":"filter_file_to_apply_filter", "from":"spatial_filter_file", "to":"apply_filter:__FILTER_IN__" }, + { "id":"prefilter_to_tmp_bam", "from":"prefilter:__PF2_OUT__", "to":"pre_spatial_filter_bam" }, { "id":"tmp_bam_to_apply", "from":"pre_spatial_filter_bam", "to":"apply_filter" }, { "id":"apply_filter_to_tee", "from":"apply_filter", "to":"tee_split" }, { "id":"tee_to_filtered_bam", "from":"tee_split:__FILTERED_BAM_OUT__", "to":"filtered_bam" }, diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index e15077468..ed482179c 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -5,7 +5,7 @@ "ports":{ "inputs":{ "_stdin_":"tee4", - "reference":["bwa_aln_1:__REFERENCE_GENOME_FASTA__", "bwa_aln_2:__REFERENCE_GENOME_FASTA__", "bwa_sampe:__REFERENCE_GENOME_FASTA__"] + "reference":["bwa_aln_1:__REFERENCE_GENOME_FASTA_IN__", "bwa_aln_2:__REFERENCE_GENOME_FASTA_IN__", "bwa_sampe:__REFERENCE_GENOME_FASTA_IN__"] }, "outputs":{ "_stdout_":"samtobam" @@ -25,33 +25,17 @@ { "id":"tee4", "type":"EXEC", - "cmd":["teepot", "-v", "-w", "300", "-m", "1G", "__ALN_1_OUT__", "__ALN_2_OUT__", "__SAMPE_1_OUT__", "__SAMPE_2_OUT__" ] + "cmd":[ "teepot", "-v", "-w", "300", "-m", "1G", "__ALN_1_OUT__", "__ALN_2_OUT__", "__SAMPE_1_OUT__", "__SAMPE_2_OUT__" ] }, { "id":"bwa_aln_1", "type":"EXEC", - "cmd":[ - {"subst":"bwa_executable"}, - "aln", - "-t", - {"subst":"aligner_numthreads"}, - "-b1", - "__REFERENCE_GENOME_FASTA__", - "__BAM_IN__" - ] + "cmd":[ {"subst":"bwa_executable"}, "aln", "-t", {"subst":"aligner_numthreads"}, "-b1", "__REFERENCE_GENOME_FASTA_IN__", "__BAM_IN__" ] }, { "id":"bwa_aln_2", "type":"EXEC", - "cmd":[ - {"subst":"bwa_executable"}, - "aln", - "-t", - {"subst":"aligner_numthreads"}, - "-b2", - "__REFERENCE_GENOME_FASTA__", - "__BAM_IN__" - ] + "cmd":[ {"subst":"bwa_executable"}, "aln", "-t", {"subst":"aligner_numthreads"}, "-b2", "__REFERENCE_GENOME_FASTA_IN__", "__BAM_IN__" ] }, { "id":"simple_cat1", @@ -70,15 +54,7 @@ { "id":"bwa_sampe", "type":"EXEC", - "cmd":[ - {"subst":"bwa_executable"}, - "sampe", - "__REFERENCE_GENOME_FASTA__", - "__SAI_1__", - "__SAI_2__", - "__BAM_1__", - "__BAM_2__" - ] + "cmd":[ {"subst":"bwa_executable"}, "sampe", "__REFERENCE_GENOME_FASTA__", "__SAI_1_IN__", "__SAI_2_IN__", "__BAM_1_IN__", "__BAM_2_IN__" ] }, { "id":"samtobam", @@ -90,11 +66,11 @@ { "id":"bwa_aln_bam1", "from":"tee4:__ALN_1_OUT__", "to":"bwa_aln_1:__BAM_IN__" }, { "id":"bwa_aln_bam2", "from":"tee4:__ALN_2_OUT__", "to":"bwa_aln_2:__BAM_IN__" }, { "id":"tee_to_cat1", "from":"tee4:__SAMPE_1_OUT__", "to":"simple_cat1" }, - { "id":"cat1_to_sampe1", "from":"simple_cat1", "to":"bwa_sampe:__BAM_1__" }, + { "id":"cat1_to_sampe1", "from":"simple_cat1", "to":"bwa_sampe:__BAM_1_IN__" }, { "id":"tee_to_cat2", "from":"tee4:__SAMPE_2_OUT__", "to":"simple_cat2" }, - { "id":"cat1_to_sampe2", "from":"simple_cat2", "to":"bwa_sampe:__BAM_2__" }, - { "id":"aln1_sai_to_sampe", "from":"bwa_aln_1", "to":"bwa_sampe:__SAI_1__" }, - { "id":"aln2_sai_to_sampe", "from":"bwa_aln_2", "to":"bwa_sampe:__SAI_2__" }, + { "id":"cat1_to_sampe2", "from":"simple_cat2", "to":"bwa_sampe:__BAM_2_IN__" }, + { "id":"aln1_sai_to_sampe", "from":"bwa_aln_1", "to":"bwa_sampe:__SAI_1_IN__" }, + { "id":"aln2_sai_to_sampe", "from":"bwa_aln_2", "to":"bwa_sampe:__SAI_2_IN__" }, { "id":"bwa_sampe_to_scramble", "from":"bwa_sampe", "to":"samtobam" } ] } diff --git a/data/vtlib/bwa_aln_se_alignment.json b/data/vtlib/bwa_aln_se_alignment.json index f525c8a1a..008613f91 100644 --- a/data/vtlib/bwa_aln_se_alignment.json +++ b/data/vtlib/bwa_aln_se_alignment.json @@ -5,7 +5,7 @@ "ports":{ "inputs":{ "_stdin_":"tee2", - "reference":["bwa_aln:__REFERENCE_GENOME_FASTA__", "bwa_samse:__REFERENCE_GENOME_FASTA__"] + "reference":["bwa_aln:__REFERENCE_GENOME_FASTA_IN__", "bwa_samse:__REFERENCE_GENOME_FASTA_IN__"] }, "outputs":{ "_stdout_":"samtobam" @@ -25,20 +25,12 @@ { "id":"tee2", "type":"EXEC", - "cmd":["teepot", "-v", "-w", "300", "-m", "1G", "__ALN_OUT__", "__SAMPE_OUT__" ] + "cmd":[ "teepot", "-v", "-w", "300", "-m", "1G", "__ALN_OUT__", "__SAMPE_OUT__" ] }, { "id":"bwa_aln", "type":"EXEC", - "cmd":[ - {"subst":"bwa_executable"}, - "aln", - "-t", - {"subst":"aligner_numthreads"}, - "-b", - "__REFERENCE_GENOME_FASTA__", - "__BAM_IN__" - ] + "cmd":[ {"subst":"bwa_executable"}, "aln", "-t", {"subst":"aligner_numthreads"}, "-b", "__REFERENCE_GENOME_FASTA_IN__", "__BAM_IN__" ] }, { "id":"simple_cat", @@ -50,13 +42,7 @@ { "id":"bwa_samse", "type":"EXEC", - "cmd":[ - {"subst":"bwa_executable"}, - "samse", - "__REFERENCE_GENOME_FASTA__", - "__SAI_IN__", - "__BAM_IN__" - ] + "cmd":[ {"subst":"bwa_executable"}, "samse", "__REFERENCE_GENOME_FASTA_IN__", "__SAI_IN__", "__BAM_IN__" ] }, { "id":"samtobam", diff --git a/data/vtlib/bwa_mem_alignment.json b/data/vtlib/bwa_mem_alignment.json index 576952bfd..52dad88b5 100644 --- a/data/vtlib/bwa_mem_alignment.json +++ b/data/vtlib/bwa_mem_alignment.json @@ -5,7 +5,7 @@ "ports":{ "inputs":{ "_stdin_":"bamtofastq", - "reference":"bwa_mem:__DB_PREFIX_REFERENCE_GENOME__" + "reference":"bwa_mem:__DB_PREFIX_REFERENCE_GENOME_IN__" }, "outputs":{ "_stdout_":"samtobam" @@ -24,29 +24,9 @@ { "id":"bwa_mem_T_flag", "required":"no", - "subst_constructor":{ - "vals":[ - "-T", - {"subst":"bwa_mem_T_value"} - ] - } + "subst_constructor":{ "vals":[ "-T", {"subst":"bwa_mem_T_value"} ] } }, - {"id":"bwa_mem_p_flag","required":"no","default":"-p","comment":"by default, paired alignment is assumed"}, - { - "id":"bwa_mem_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"bwa_executable"}, - "mem", - "-t", - {"subst":"aligner_numthreads"}, - {"subst":"bwa_mem_p_flag"}, - {"subst":"bwa_mem_T_flag"} - ], - "postproc":{"op":"pack","pad":" "} - } - } + {"id":"bwa_mem_p_flag","required":"no","default":"-p","comment":"by default, paired alignment is assumed"} ], "nodes":[ { @@ -58,7 +38,7 @@ "id":"bwa_mem", "comment":"presuming interleaved FR fastq records (-p flag), output all records (-T 0)", "type":"EXEC", - "cmd":[{"subst":"bwa_mem_cmd"}, "__DB_PREFIX_REFERENCE_GENOME__", "__FQ_IN__"] + "cmd":[ {"subst":"bwa_executable"}, "mem", "-t", {"subst":"aligner_numthreads"}, {"subst":"bwa_mem_p_flag"}, {"subst":"bwa_mem_T_flag"}, "__DB_PREFIX_REFERENCE_GENOME_IN__", "__FQ_IN__" ] }, { "id":"samtobam", diff --git a/data/vtlib/final_output_noalign_prep.json b/data/vtlib/final_output_noalign_prep.json index fd88202c1..2e5f6a448 100644 --- a/data/vtlib/final_output_noalign_prep.json +++ b/data/vtlib/final_output_noalign_prep.json @@ -18,33 +18,10 @@ "id":"bs_tmpfile_flag", "required":"no", "subst_constructor":{ - "vals":[ - "tmpfile=", - {"subst":"outdatadir"}, - "/", - {"subst":"bstmp"}, - "_", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".tmp" - ], + "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"bstmp"}, "_", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".tmp" ], "postproc":{"op":"concat", "pad":""} } }, - { - "id":"scramble_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"scramble_executable"}, - "-I", - "bam", - "-O", - "cram" - ], - "postproc":{"op":"pack"} - } - }, { "id":"bmd_tmpfile_flag", "required":"no", @@ -70,88 +47,22 @@ } }, {"id":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, - { - "id":"bammarkduplicates", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"bmd_cmd"}, - "level=0", - "verbose=0", - {"subst":"bmd_tmpfile_flag"}, - {"subst":"bmd_metrics_file_flag"}, - {"subst":"bmd_resetdupflag"} - ], - "postproc":{"op":"pack"} - } - }, {"id":"stats_filter__F0x900","required":"no","default":"0x900"}, - { - "id":"samtools_stats_F0x900", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "stats", - "-F", - {"subst":"stats_filter__F0x900"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, {"id":"stats_filter__F0xB00","required":"no","default":"0xB00"}, - { - "id":"samtools_stats_F0xB00", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "stats", - "-F", - {"subst":"stats_filter__F0xB00"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, - {"id":"seqchksum_cmd","required":"no","default":"bamseqchksum"}, {"id":"seqchksum_hash_type","required":"no","default":"sha512primesums512"}, { "id":"seqchksum_hash_flag", "required":"yes", "subst_constructor":{ - "vals":[ - "hash", - {"subst":"seqchksum_hash_type"} - ], + "vals":[ "hash", {"subst":"seqchksum_hash_type"} ], "postproc":{"op":"concat","pad":"="} } }, - { - "id":"seqchksum_extrahash_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bamseqchksum", - {"subst":"seqchksum_hash_flag"} - ], - "postproc":{"op":"pack","pad":" "} - }, - "comment":"it seems peculiar to fix the command here, but the flag is specific to it; hash type is selectable" - }, { "id":"br_md5file_flag", "required":"no", "subst_constructor":{ - "vals":[ - "md5filename=", - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".bam.md5" - ], + "vals":[ "md5filename=", {"subst":"outdatadir"}, "/", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".bam.md5" ], "postproc":{"op":"concat", "pad":""} } }, @@ -159,10 +70,7 @@ "id":"br_numthreads_flag", "required":"no", "subst_constructor":{ - "vals":[ - "numthreads=", - {"subst":"br_numthreads_val"} - ], + "vals":[ "numthreads=", {"subst":"br_numthreads_val"} ], "postproc":{"op":"concat", "pad":""} } }, @@ -170,63 +78,11 @@ "id":"br_tmpfile_flag", "required":"no", "subst_constructor":{ - "vals":[ - "tmpfile=", - {"subst":"outdatadir"}, - "/", - {"subst":"brtmp"}, - "_", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".tmp" - ], + "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"brtmp"}, "_", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".tmp" ], "postproc":{"op":"concat", "pad":""} } }, - { - "id":"bamrecompress", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bamrecompress", - "verbose=0", - "index=0", - "md5=1", - {"subst":"br_md5file_flag"}, - {"subst":"br_numthreads_flag"}, - {"subst":"br_tmpfile_flag"} - ], - "postproc":{"op":"pack", "pad":" "} - } - }, {"id":"flagstats_filter_flag","required":"no","default":"0x900"}, - { - "id":"samtools_flagstat_filter", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "view", - "-u", - "-F", - {"subst":"flagstats_filter_flag"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, - { - "id":"samtools_flagstat_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "flagstat", - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, {"id":"fopid","default":{"subst":"phix_or_target"}}, { "id":"bam_file", @@ -262,14 +118,7 @@ "id":"seqchksum_extrahash_file", "required":"yes", "subst_constructor":{ - "vals":[ - {"subst":"outdatadir"}, - "/", - {"subst":"fopid"}, - ".", - {"subst":"seqchksum_hash_type"}, - ".seqchksum" - ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".", {"subst":"seqchksum_hash_type"}, ".seqchksum" ], "postproc":{"op":"concat", "pad":""} }, "comment":"default hash type is currently sha512primesums512" @@ -312,7 +161,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"bammarkduplicates"} + "cmd": [ {"subst":"bmd_cmd"}, "level=0", "verbose=0", {"subst":"bmd_tmpfile_flag"}, {"subst":"bmd_metrics_file_flag"}, {"subst":"bmd_resetdupflag"} ] }, { "id":"fo_in_multiway", @@ -320,21 +169,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900__", "__SAMTOOLS_STATS_F0xB00__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"scramble_cmd"} + "cmd": [ {"subst":"scramble_executable"}, "-I", "bam", "-O", "cram" ] }, { "id":"scramble_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -342,14 +191,14 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"md5sum" + "cmd":[ "md5sum" ] }, { "id":"postprocess_md5", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["tr", "-d", " \\-\n"], + "cmd":[ "tr", "-d", " \\-\n" ], "comment":"the double-backslash is required to get the correct character set to the tr command" }, { @@ -357,42 +206,42 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamseqchksum", "inputformat=cram"] + "cmd":[ "bamseqchksum", "inputformat=cram" ] }, { "id":"bamcheck", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamcheck", "-F", "0x900"] + "cmd":[ "bamcheck", "-F", "0x900" ] }, { "id":"samtools_stats_F0x900", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_stats_F0x900"} + "cmd": [ {"subst":"samtools_executable"}, "stats", "-F", {"subst":"stats_filter__F0x900"}, "-" ] }, { "id":"samtools_stats_F0xB00", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_stats_F0xB00"} + "cmd": [ {"subst":"samtools_executable"}, "stats", "-F", {"subst":"stats_filter__F0xB00"}, "-" ] }, { "id":"seqchksum", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"seqchksum_cmd"} + "cmd":[ "bamseqchksum" ] }, { "id":"seqchksum_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -400,7 +249,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"seqchksum_extrahash_cmd"}, + "cmd":[ "bamseqchksum", {"subst":"seqchksum_hash_flag"} ], "comment":"default hash type is currently sha512primesums512" }, @@ -409,7 +258,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"bamrecompress"} + "cmd":[ "bamrecompress", "verbose=0", "index=0", "md5=1", {"subst":"br_md5file_flag"}, {"subst":"br_numthreads_flag"}, {"subst":"br_tmpfile_flag"} ] }, { "id":"bam_file", "type":"OUTFILE", "name":{"subst":"bam_file"} }, { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, @@ -425,7 +274,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_flagstat_filter"}, + "cmd": [ {"subst":"samtools_executable"}, "view", "-u", "-F", {"subst":"flagstats_filter_flag"}, "-" ], "description":"Filter out secondary and supplementary alignment records" }, { @@ -433,14 +282,14 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_flagstat_cmd"} + "cmd": [ {"subst":"samtools_executable"}, "flagstat", "-" ] }, { "id":"cmp_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":["cmp", "__BAM_SEQCHKSUM_IN__", "__CRAM_SEQCHKSUM_IN__"] + "cmd":[ "cmp", "__BAM_SEQCHKSUM_IN__", "__CRAM_SEQCHKSUM_IN__" ] } ], "edges":[ @@ -451,8 +300,8 @@ { "id":"scramble_tee_to_bscs", "from":"scramble_tee:__SEQCHKSUM_OUT__", "to":"cram_seqchksum" }, { "id":"md5_to_postprocess", "from":"scramble_md5", "to":"postprocess_md5" }, { "id":"bmdmw_to_bamcheck", "from":"fo_in_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, - { "id":"bmdmw_to_sts_F0x900", "from":"fo_in_multiway:__SAMTOOLS_STATS_F0x900__", "to":"samtools_stats_F0x900" }, - { "id":"bmdmw_to_sts_F0xB00", "from":"fo_in_multiway:__SAMTOOLS_STATS_F0xB00__", "to":"samtools_stats_F0xB00" }, + { "id":"bmdmw_to_sts_F0x900", "from":"fo_in_multiway:__SAMTOOLS_STATS_F0x900_OUT__", "to":"samtools_stats_F0x900" }, + { "id":"bmdmw_to_sts_F0xB00", "from":"fo_in_multiway:__SAMTOOLS_STATS_F0xB00_OUT__", "to":"samtools_stats_F0xB00" }, { "id":"bmdmw_to_seqchksum", "from":"fo_in_multiway:__SEQCHKSUM_OUT__", "to":"seqchksum" }, { "id":"bmdmw_to_seqchksum_extrahash", "from":"fo_in_multiway:__SEQCHKSUM_EXTRAHASH_OUT__", "to":"seqchksum_extrahash" }, { "id":"bmdmw_to_flagstat", "from":"fo_in_multiway:__FLAGSTAT_OUT__", "to":"flagstat_filter" }, diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 9a9da84fc..9cd03a228 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -3,12 +3,8 @@ "description":"steps in the alignment pipeline to post-process bam files produced by the AlignmentFilter", "subgraph_io":{ "ports":{ - "inputs":{ - "_stdin_":"bamsort_coord" - }, - "outputs":{ - "_stdout_":"seqchksum_tee:__FINAL_OUT__" - } + "inputs":{ "_stdin_":"bamsort_coord" }, + "outputs":{ "_stdout_":"seqchksum_tee:__FINAL_OUT__" } } }, "subst_params":[ @@ -24,35 +20,10 @@ "id":"bs_tmpfile_flag", "required":"no", "subst_constructor":{ - "vals":[ - "tmpfile=", - {"subst":"outdatadir"}, - "/", - {"subst":"bstmp"}, - "_", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".tmp" - ], + "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"bstmp"}, "_", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".tmp" ], "postproc":{"op":"concat", "pad":""} } }, - { - "id":"bamsort_coord", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bamsort", - "SO=coordinate", - "level=0", - "verbose=0", - "fixmate=1", - "adddupmarksupport=1", - {"subst":"bs_tmpfile_flag"} - ], - "postproc":{"op":"pack", "pad":" "} - } - }, { "id":"scramble_reference_flag", "required":"no", @@ -60,35 +31,11 @@ "subst_constructor":{ "vals":[ "-r", {"subst":"scramble_reference_fasta"} ] } }, - { - "id":"scramble_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"scramble_executable"}, - "-I", - "bam", - "-O", - "cram", - {"subst":"scramble_reference_flag"} - ], - "postproc":{"op":"pack"} - } - }, { "id":"bmd_tmpfile_flag", "required":"no", "subst_constructor":{ - "vals":[ - "tmpfile=", - {"subst":"outdatadir"}, - "/", - {"subst":"bmdtmp","required":"yes"}, - "_", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".tmp" - ], + "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"bmdtmp","required":"yes"}, "_", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".tmp" ], "postproc":{"op":"concat", "pad":""} } }, @@ -96,14 +43,7 @@ "id":"bmd_metrics_file_flag", "required":"no", "subst_constructor":{ - "vals":[ - "M=", - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".markdups_metrics.txt" - ], + "vals":[ "M=", {"subst":"outdatadir"}, "/", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".markdups_metrics.txt" ], "postproc":{"op":"concat", "pad":""} } }, @@ -111,125 +51,37 @@ "id":"bmd_resetdupflag", "comment":"this option should only be used with bamstreamingmarkduplicates (not bammarkduplicates or bammarkduplicates2)", "subst_constructor":{ - "vals":[ - "resetdupflag", - {"subst":"bmd_resetdupflag_val"} - ], + "vals":[ "resetdupflag", {"subst":"bmd_resetdupflag_val"} ], "postproc":{"op":"concat", "pad":"="} } }, {"id":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, - { - "id":"bammarkduplicates", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"bmd_cmd"}, - "level=0", - "verbose=0", - {"subst":"bmd_tmpfile_flag"}, - {"subst":"bmd_metrics_file_flag"}, - {"subst":"bmd_resetdupflag"} - ], - "postproc":{"op":"pack"} - } - }, {"id":"stats_filter__F0x900","required":"no","default":"0x900"}, - { - "id":"samtools_stats_F0x900", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "stats", - "-F", - {"subst":"stats_filter__F0x900"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, {"id":"stats_filter__F0xB00","required":"no","default":"0xB00"}, - { - "id":"samtools_stats_F0xB00", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "stats", - "-F", - {"subst":"stats_filter__F0xB00"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, {"id":"calibration_pu_executable","required":"no","default":"calibration_pu"}, {"id":"calibration_pu_bad_tiles_count","required":"no","default":"2"}, { "id":"calibration_pu_prefix", "required":"yes", "subst_constructor":{ - "vals":[ - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - {"subst":"phix_or_target"} - ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, {"subst":"phix_or_target"} ], "postproc":{"op":"concat", "pad":""} } }, - {"id":"seqchksum_cmd","required":"no","default":"bamseqchksum"}, {"id":"seqchksum_hash_type","required":"no","default":"sha512primesums512"}, { "id":"seqchksum_hash_flag", "required":"yes", "subst_constructor":{ - "vals":[ - "hash", - {"subst":"seqchksum_hash_type"} - ], + "vals":[ "hash", {"subst":"seqchksum_hash_type"} ], "postproc":{"op":"concat","pad":"="} } }, - { - "id":"seqchksum_extrahash_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bamseqchksum", - {"subst":"seqchksum_hash_flag"} - ], - "postproc":{"op":"pack","pad":" "} - }, - "comment":"it seems peculiar to fix the command here, but the flag is specific to it; hash type is selectable" - }, - { - "id":"calibration_pu_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"calibration_pu_executable"}, - "-p", - {"subst":"calibration_pu_prefix"}, - "-filter-bad-tiles",{"subst":"calibration_pu_bad_tiles_count"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, { "id":"br_indexfile_flag", "required":"no", "subst_constructor":{ - "vals":[ - "indexfilename=", - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".bai" - ], + "vals":[ "indexfilename=", {"subst":"outdatadir"}, "/", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".bai" ], "postproc":{"op":"concat", "pad":""} } }, @@ -237,14 +89,7 @@ "id":"br_md5file_flag", "required":"no", "subst_constructor":{ - "vals":[ - "md5filename=", - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".bam.md5" - ], + "vals":[ "md5filename=", {"subst":"outdatadir"}, "/", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".bam.md5" ], "postproc":{"op":"concat", "pad":""} } }, @@ -252,10 +97,7 @@ "id":"br_numthreads_flag", "required":"no", "subst_constructor":{ - "vals":[ - "numthreads=", - {"subst":"br_numthreads_val"} - ], + "vals":[ "numthreads=", {"subst":"br_numthreads_val"} ], "postproc":{"op":"concat", "pad":""} } }, @@ -263,64 +105,11 @@ "id":"br_tmpfile_flag", "required":"no", "subst_constructor":{ - "vals":[ - "tmpfile=", - {"subst":"outdatadir"}, - "/", - {"subst":"brtmp"}, - "_", - {"subst":"rpt"}, - {"subst":"phix_or_target"}, - ".tmp" - ], + "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"brtmp"}, "_", {"subst":"rpt"}, {"subst":"phix_or_target"}, ".tmp" ], "postproc":{"op":"concat", "pad":""} } }, - { - "id":"bamrecompress", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bamrecompress", - "verbose=0", - "index=1", - {"subst":"br_indexfile_flag"}, - "md5=1", - {"subst":"br_md5file_flag"}, - {"subst":"br_numthreads_flag"}, - {"subst":"br_tmpfile_flag"} - ], - "postproc":{"op":"pack", "pad":" "} - } - }, {"id":"flagstats_filter_flag","required":"no","default":"0x900"}, - { - "id":"samtools_flagstat_filter", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "view", - "-u", - "-F", - {"subst":"flagstats_filter_flag"}, - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, - { - "id":"samtools_flagstat_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "flagstat", - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, {"id":"fopid","default":{"subst":"phix_or_target"}}, { "id":"bam_file", @@ -363,14 +152,7 @@ "id":"seqchksum_extrahash_file", "required":"yes", "subst_constructor":{ - "vals":[ - {"subst":"outdatadir"}, - "/", - {"subst":"fopid"}, - ".", - {"subst":"seqchksum_hash_type"}, - ".seqchksum" - ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".", {"subst":"seqchksum_hash_type"}, ".seqchksum" ], "postproc":{"op":"concat", "pad":""} }, "comment":"default hash type is currently sha512primesums512" @@ -412,7 +194,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"bamsort_coord"} + "cmd": [ "bamsort", "SO=coordinate", "level=0", "verbose=0", "fixmate=1", "adddupmarksupport=1", {"subst":"bs_tmpfile_flag"} ] }, { "id":"bammarkduplicates", @@ -420,7 +202,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"bammarkduplicates"} + "cmd": [ {"subst":"bmd_cmd"}, "level=0", "verbose=0", {"subst":"bmd_tmpfile_flag"}, {"subst":"bmd_metrics_file_flag"}, {"subst":"bmd_resetdupflag"} ] }, { "id":"bmd_multiway", @@ -428,21 +210,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__CALIBRATION_PU_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900__", "__SAMTOOLS_STATS_F0xB00__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__CALIBRATION_PU_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"scramble_cmd"} + "cmd": [ {"subst":"scramble_executable"}, "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -457,7 +239,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["tr", "-d", " \\-\n"], + "cmd":[ "tr", "-d", " \\-\n" ], "comment":"the double-backslash is required to get the correct character set to the tr command" }, { @@ -465,49 +247,49 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["cram_index", "-", {"subst":"crai_file"}] + "cmd":[ "cram_index", "-", {"subst":"crai_file"} ] }, { "id":"cram_seqchksum", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamseqchksum", "inputformat=cram"] + "cmd":[ "bamseqchksum", "inputformat=cram" ] }, { "id":"bamcheck", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamcheck", "-F", "0x900"] + "cmd":[ "bamcheck", "-F", "0x900" ] }, { "id":"samtools_stats_F0x900", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_stats_F0x900"} + "cmd": [ {"subst":"samtools_executable"}, "stats", "-F", {"subst":"stats_filter__F0x900"}, "-" ] }, { "id":"samtools_stats_F0xB00", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_stats_F0xB00"} + "cmd": [ {"subst":"samtools_executable"}, "stats", "-F", {"subst":"stats_filter__F0xB00"}, "-" ] }, { "id":"seqchksum", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"seqchksum_cmd"} + "cmd":[ "bamseqchksum" ] }, { "id":"seqchksum_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -515,7 +297,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"seqchksum_extrahash_cmd"}, + "cmd": [ "bamseqchksum", {"subst":"seqchksum_hash_flag"} ], "comment":"default hash type is currently sha512primesums512" }, @@ -524,14 +306,14 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":{"subst":"calibration_pu_cmd"} + "cmd": [ {"subst":"calibration_pu_executable"}, "-p", {"subst":"calibration_pu_prefix"}, "-filter-bad-tiles", {"subst":"calibration_pu_bad_tiles_count"}, "-" ] }, { "id":"bamrecompress", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"bamrecompress"} + "cmd": [ "bamrecompress", "verbose=0", "index=1", {"subst":"br_indexfile_flag"}, "md5=1", {"subst":"br_md5file_flag"}, {"subst":"br_numthreads_flag"}, {"subst":"br_tmpfile_flag"} ] }, { "id":"bam_file", "type":"OUTFILE", "name":{"subst":"bam_file"} }, { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, @@ -547,7 +329,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_flagstat_filter"}, + "cmd": [ {"subst":"samtools_executable"}, "view", "-u", "-F", {"subst":"flagstats_filter_flag"}, "-" ], "description":"Filter out secondary and supplementary alignment records" }, { @@ -555,14 +337,14 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_flagstat_cmd"} + "cmd": [ {"subst":"samtools_executable"}, "flagstat", "-" ] }, { "id":"cmp_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":["cmp", "__BAM_SEQCHKSUM_IN__", "__CRAM_SEQCHKSUM_IN__"] + "cmd":[ "cmp", "__BAM_SEQCHKSUM_IN__", "__CRAM_SEQCHKSUM_IN__" ] } ], "edges":[ @@ -575,8 +357,8 @@ { "id":"scramble_tee_to_bscs", "from":"scramble_tee:__SEQCHKSUM_OUT__", "to":"cram_seqchksum" }, { "id":"md5_to_postprocess", "from":"scramble_md5", "to":"postprocess_md5" }, { "id":"bmdmw_to_bamcheck", "from":"bmd_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, - { "id":"bmdmw_to_sts_F0x900", "from":"bmd_multiway:__SAMTOOLS_STATS_F0x900__", "to":"samtools_stats_F0x900" }, - { "id":"bmdmw_to_sts_F0xB00", "from":"bmd_multiway:__SAMTOOLS_STATS_F0xB00__", "to":"samtools_stats_F0xB00" }, + { "id":"bmdmw_to_sts_F0x900", "from":"bmd_multiway:__SAMTOOLS_STATS_F0x900_OUT__", "to":"samtools_stats_F0x900" }, + { "id":"bmdmw_to_sts_F0xB00", "from":"bmd_multiway:__SAMTOOLS_STATS_F0xB00_OUT__", "to":"samtools_stats_F0xB00" }, { "id":"bmdmw_to_calibration_pu", "from":"bmd_multiway:__CALIBRATION_PU_OUT__", "to":"calibration_pu" }, { "id":"bmdmw_to_seqchksum", "from":"bmd_multiway:__SEQCHKSUM_OUT__", "to":"seqchksum" }, { "id":"bmdmw_to_seqchksum_extrahash", "from":"bmd_multiway:__SEQCHKSUM_EXTRAHASH_OUT__", "to":"seqchksum_extrahash" }, diff --git a/data/vtlib/merge_aligned.json b/data/vtlib/merge_aligned.json index ee60e4937..66580c6c3 100644 --- a/data/vtlib/merge_aligned.json +++ b/data/vtlib/merge_aligned.json @@ -1,7 +1,7 @@ { - "description": "Read ALIGNED data from multiple library CRAM files producing merged output in CRAM format: full PG history, complete SQ lines, adapter marking, etc. The seqchksum file merged from the originals is compared with the seqchksum produced post merge.The input cram is already expected to have been sorted by coordinates with the adddupmarksupport flag specified", - "version":"0.5", - "subst_params": [ +"description": "Read ALIGNED data from multiple library CRAM files producing merged output in CRAM format: full PG history, complete SQ lines, adapter marking, etc. The seqchksum file merged from the originals is compared with the seqchksum produced post merge.The input cram is already expected to have been sorted by coordinates with the adddupmarksupport flag specified", +"version":"0.5", +"subst_params": [ { "id":"basic_pipeline_params_file", "required":"yes", @@ -10,193 +10,129 @@ "postproc":{"op":"concat", "pad":""} } }, - { - "id": "basic_pipeline_params", - "type":"SPFILE", - "name":{"subst":"basic_pipeline_params_file"}, - "required": "no", - "comment":"this will expand to a set of subst_param elements" - }, - { - "id":"incrams", - "required":"yes", - "default":"", - "comment":"An iRODS path can be specified here" - }, - { - "id":"crammerge", - "required":"yes", - "subst_constructor":{ - "vals":[ - "bammerge", - "SO=coordinate", - "inputformat=cram", - "outputformat=bam", - {"subst":"incrams"} - ], - "postproc":{"op":"pack", "pad":" "} - } - }, - { - "id":"bmd_tmpfile_flag", - "required":"no", - "subst_constructor":{ - "vals":[ - "tmpfile=", - {"subst":"outdatadir"}, - "/", - {"subst":"bmdtmp","required":"yes"}, - "_", - {"subst":"library"}, - ".tmp" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"bmd_metrics_file_flag", - "required":"no", - "subst_constructor":{ - "vals":[ - "M=", - {"subst":"outdatadir"}, - "/", - {"subst":"library"}, - ".markdups_metrics.txt" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"bmd_resetdupflag", - "comment":"this option should only be used with bamstreamingmarkduplicates (not bammarkduplicates or bammarkduplicates2)", - "subst_constructor":{ - "vals":[ - "resetdupflag", - {"subst":"bmd_resetdupflag_val"} - ], - "postproc":{"op":"concat", "pad":"="} - } - }, - {"id":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, - { - "id":"bammarkduplicates", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"bmd_cmd"}, - "level=0", - "verbose=0", - {"subst":"bmd_tmpfile_flag"}, - {"subst":"bmd_metrics_file_flag"}, - {"subst":"bmd_resetdupflag"} - ], - "postproc":{"op":"pack"} - } - }, - { - "id":"incrams_seqchksum", - "required":"yes", - "default":"", - "comment":"An iRODS path should not be used here" - }, - { - "id":"merge_seqchksum", - "required":"yes", - "subst_constructor":{ - "vals":[ - "./seqchksum_merge.pl", - {"subst":"incrams_seqchksum"} - ], - "postproc":{"op":"pack", "pad":" "} - } - }, - {"id":"final_output_prep_name","required":"no","default":"merge_final_output_prep"}, - { - "id":"merge_final_output_prep", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"cfgdatadir"}, - "/", - {"subst":"final_output_prep_name"}, - ".json" - ], - "postproc":{"op":"concat", "pad":""} - } - } - ], - "nodes": [ - { - "id":"bammarkduplicates", - "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":{"subst":"bammarkduplicates"} - }, - { - "id":"crammerge", - "type":"EXEC", - "use_STDIN": false, - "use_STDOUT": true, - "cmd":{"subst":"crammerge"}, - "description":"merge individual cram files from a sample into one cram file" - }, - { - "id": "merge_final_output_prep", - "type": "VTFILE", - "subst_map": - { - "bstmp":"bspaft", - "brtmp":"brpaft", - "bmdtmp":"bmdpaft" - }, - "name": {"subst":"merge_final_output_prep"}, - "comment":"inputs: _stdin_ (bam); outputs: _stdout_ (seqchksum_file)", - "description": "subgraph containing post alignment_filter process (target)" - }, - { - "id":"merge_seqchksum", - "type":"EXEC", - "use_STDIN": false, - "use_STDOUT": true, - "cmd":{"subst":"merge_seqchksum"}, - "description": "merge individual cram seqchksum (crc32prod) files" - }, - { - "id":"seqchksum_head5", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "type":"EXEC", - "cmd":["head", "-5"], - "description":"Only use top 5 lines of seqchksum file for the comparison" - }, - { - "id":"seqchksumdefault_head5", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "type":"EXEC", - "cmd":["head", "-5"], - "description":"Only use top 5 lines of seqchksum file for the comparison." - }, - { - "id":"cmp_seqchksumdefault", - "use_STDIN": true, - "use_STDOUT": true, - "type":"EXEC", - "cmd":["cmp", "-s", "__MERGED_SEQCHKSUM_IN__"], - "description":"check input primary/sequence data matches output" - } - ], + { + "id": "basic_pipeline_params", + "type":"SPFILE", + "name":{"subst":"basic_pipeline_params_file"}, + "required": "no", + "comment":"this will expand to a set of subst_param elements" + }, + { + "id":"incrams", + "required":"yes", + "default":"", + "comment":"An iRODS path can be specified here" + }, + { + "id":"bmd_tmpfile_flag", + "required":"no", + "subst_constructor":{ + "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"bmdtmp","required":"yes"}, "_", {"subst":"library"}, ".tmp" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"bmd_metrics_file_flag", + "required":"no", + "subst_constructor":{ + "vals":[ "M=", {"subst":"outdatadir"}, "/", {"subst":"library"}, ".markdups_metrics.txt" ], + "postproc":{"op":"concat", "pad":""} + } + }, + { + "id":"bmd_resetdupflag", + "comment":"this option should only be used with bamstreamingmarkduplicates (not bammarkduplicates or bammarkduplicates2)", + "subst_constructor":{ + "vals":[ "resetdupflag", {"subst":"bmd_resetdupflag_val"} ], + "postproc":{"op":"concat", "pad":"="} + } + }, + {"id":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, + { + "id":"incrams_seqchksum", + "required":"yes", + "default":"", + "comment":"An iRODS path should not be used here" + }, + {"id":"final_output_prep_name","required":"no","default":"merge_final_output_prep"}, + { + "id":"merge_final_output_prep", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"final_output_prep_name"}, ".json" ], + "postproc":{"op":"concat", "pad":""} + } + } +], +"nodes": [ + { + "id":"bammarkduplicates", + "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "orig_cmd":{"subst":"bammarkduplicates"}, + "cmd": [ {"subst":"bmd_cmd"}, "level=0", "verbose=0", {"subst":"bmd_tmpfile_flag"}, {"subst":"bmd_metrics_file_flag"}, {"subst":"bmd_resetdupflag"} ] + }, + { + "id":"crammerge", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "orig_cmd":{"subst":"crammerge"}, + "cmd": [ "bammerge", "SO=coordinate", "inputformat=cram", "outputformat=bam", {"subst":"incrams"} ], + "description":"merge individual cram files from a sample into one cram file" + }, + { + "id": "merge_final_output_prep", + "type": "VTFILE", + "subst_map": { "bstmp":"bspaft", "brtmp":"brpaft", "bmdtmp":"bmdpaft" }, + "name": {"subst":"merge_final_output_prep"}, + "comment":"inputs: _stdin_ (bam); outputs: _stdout_ (seqchksum_file)", + "description": "subgraph containing post alignment_filter process (target)" + }, + { + "id":"merge_seqchksum", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "orig_cmd":{"subst":"merge_seqchksum"}, + "cmd":[ "seqchksum_merge.pl", {"subst":"incrams_seqchksum"} ], + "description": "merge individual cram seqchksum (crc32prod) files" + }, + { + "id":"seqchksum_head5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "type":"EXEC", + "cmd":[ "head", "-5" ], + "description":"Only use top 5 lines of seqchksum file for the comparison" + }, + { + "id":"seqchksumdefault_head5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "type":"EXEC", + "cmd":[ "head", "-5" ], + "description":"Only use top 5 lines of seqchksum file for the comparison." + }, + { + "id":"cmp_seqchksumdefault", + "use_STDIN": true, + "use_STDOUT": true, + "type":"EXEC", + "cmd":[ "cmp", "-s", "__MERGED_SEQCHKSUM_IN__" ], + "description":"check input primary/sequence data matches output" + } +], "edges": [ - { "id": "crammerge_to_bammarkduplicates", "from": "crammerge", "to": "bammarkduplicates" }, - { "id": "bammarkduplicates_to_final_output_prep", "from": "bammarkduplicates", "to": "merge_final_output_prep" }, - { "id": "merge_seqchksum_to_seqchksumdefault_tee", "from": "merge_seqchksum", "to": "seqchksumdefault_head5" }, - { "id": "seqchksumdefault_head5", "from":"seqchksumdefault_head5", "to":"cmp_seqchksumdefault:__MERGED_SEQCHKSUM_IN__" }, - { "id": "final_output_prep_to_head5", "from": "merge_final_output_prep","to": "seqchksum_head5" }, - { "id": "seqchksum_head5_to_cmp_seqchksumdefault", "from": "seqchksum_head5","to": "cmp_seqchksumdefault" } - ] + { "id": "crammerge_to_bammarkduplicates", "from": "crammerge", "to": "bammarkduplicates" }, + { "id": "bammarkduplicates_to_final_output_prep", "from": "bammarkduplicates", "to": "merge_final_output_prep" }, + { "id": "merge_seqchksum_to_seqchksumdefault_tee", "from": "merge_seqchksum", "to": "seqchksumdefault_head5" }, + { "id": "seqchksumdefault_head5", "from":"seqchksumdefault_head5", "to":"cmp_seqchksumdefault:__MERGED_SEQCHKSUM_IN__" }, + { "id": "final_output_prep_to_head5", "from": "merge_final_output_prep","to": "seqchksum_head5" }, + { "id": "seqchksum_head5_to_cmp_seqchksumdefault", "from": "seqchksum_head5","to": "cmp_seqchksumdefault" } +] } diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index 272a2383e..e7046704c 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -207,7 +207,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900__", "__SAMTOOLS_STATS_F0xB00__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", @@ -221,7 +221,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -236,7 +236,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["tr", "-d", " \\-\n"], + "cmd":[ "tr", "-d", " \\-\n" ], "comment":"the double-backslash is required to get the correct character set to the tr command" }, { @@ -244,49 +244,52 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["cram_index", "-", {"subst":"crai_file"}] + "cmd":[ "cram_index", "-", {"subst":"crai_file"} ] }, { "id":"cram_seqchksum", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamseqchksum", "inputformat=cram"] + "cmd":[ "bamseqchksum", "inputformat=cram" ] }, { "id":"bamcheck", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamcheck", "-F", "0x900"] + "cmd":[ "bamcheck", "-F", "0x900" ] }, { "id":"samtools_stats_F0x900", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_stats_F0x900"} + "orig_cmd":{"subst":"samtools_stats_F0x900"}, + "cmd": [ {"subst":"samtools_executable"}, "stats", "-F", {"subst":"stats_filter__F0x900"}, "-" ] }, { "id":"samtools_stats_F0xB00", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_stats_F0xB00"} + "orig_cmd":{"subst":"samtools_stats_F0xB00"}, + "cmd": [ {"subst":"samtools_executable"}, "stats", "-F", {"subst":"stats_filter__F0xB00"}, "-" ] }, { "id":"seqchksum", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"seqchksum_cmd"} + "orig_cmd":{"subst":"seqchksum_cmd"}, + "cmd":[ "bamseqchksum" ] }, { "id":"seqchksum_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -294,7 +297,8 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"seqchksum_extrahash_cmd"}, + "orig_cmd":{"subst":"seqchksum_extrahash_cmd"}, + "cmd": [ "bamseqchksum", {"subst":"seqchksum_hash_flag"} ], "comment":"default hash type is currently sha512primesums512" }, { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, @@ -310,7 +314,8 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_flagstat_filter"}, + "orig_cmd":{"subst":"samtools_flagstat_filter"}, + "cmd": [ {"subst":"samtools_executable"}, "view", "-u", "-F", {"subst":"flagstats_filter_flag"}, "-" ], "description":"Filter out secondary and supplementary alignment records" }, { @@ -318,14 +323,15 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_flagstat_cmd"} + "orig_cmd":{"subst":"samtools_flagstat_cmd"}, + "cmd": [ {"subst":"samtools_executable"}, "flagstat", "-" ] }, { "id":"cmp_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":["cmp", "__BAM_SEQCHKSUM_IN__", "__CRAM_SEQCHKSUM_IN__"] + "cmd":[ "cmp", "__BAM_SEQCHKSUM_IN__", "__CRAM_SEQCHKSUM_IN__" ] } ], "edges":[ @@ -336,8 +342,8 @@ { "id":"scramble_tee_to_bscs", "from":"scramble_tee:__SEQCHKSUM_OUT__", "to":"cram_seqchksum" }, { "id":"md5_to_postprocess", "from":"scramble_md5", "to":"postprocess_md5" }, { "id":"bmdmw_to_bamcheck", "from":"bmd_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, - { "id":"bmdmw_to_sts_F0x900", "from":"bmd_multiway:__SAMTOOLS_STATS_F0x900__", "to":"samtools_stats_F0x900" }, - { "id":"bmdmw_to_sts_F0xB00", "from":"bmd_multiway:__SAMTOOLS_STATS_F0xB00__", "to":"samtools_stats_F0xB00" }, + { "id":"bmdmw_to_sts_F0x900", "from":"bmd_multiway:__SAMTOOLS_STATS_F0x900_OUT__", "to":"samtools_stats_F0x900" }, + { "id":"bmdmw_to_sts_F0xB00", "from":"bmd_multiway:__SAMTOOLS_STATS_F0xB00_OUT__", "to":"samtools_stats_F0xB00" }, { "id":"bmdmw_to_seqchksum", "from":"bmd_multiway:__SEQCHKSUM_OUT__", "to":"seqchksum" }, { "id":"bmdmw_to_seqchksum_extrahash", "from":"bmd_multiway:__SEQCHKSUM_EXTRAHASH_OUT__", "to":"seqchksum_extrahash" }, { "id":"bmdmw_to_flagstat", "from":"bmd_multiway:__FLAGSTAT_OUT__", "to":"flagstat_filter" }, diff --git a/data/vtlib/post_alignment.json b/data/vtlib/post_alignment.json index fa24d2a6d..1789d542e 100644 --- a/data/vtlib/post_alignment.json +++ b/data/vtlib/post_alignment.json @@ -6,7 +6,7 @@ "inputs":{ "_stdin_":"tee_headerSQfix", "reference_dict":"alterSQ_headerSQfix:__IN_PICARD_DICT__", - "no_align_bam":"bam12auxmerge:__NO_ALN_BAM__" + "no_align_bam":"bam12auxmerge:__NO_ALN_BAM_IN__" }, "outputs":{ "_stdout_":"bam12auxmerge" @@ -20,31 +20,7 @@ "name":{"subst":"basic_pipeline_params_file"}, "required": "no", "comment":"this will expand to a set of subst_param elements" - }, - { - "id":"samtools_view", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "view", - "-h", - "-" - ], - "postproc":{"op":"pack","pad":" "} - } - }, - { - "id":"samtools_reheader", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst":"samtools_executable"}, - "reheader" - ], - "postproc":{"op":"pack","pad":" "} - } - } + } ], "nodes":[ { @@ -60,7 +36,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"samtools_view"} + "cmd":[ {"subst":"samtools_executable"}, "view", "-h", "-" ] }, { "id":"alterSQ_headerSQfix", @@ -76,14 +52,14 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["teepot", "-v", "-m", "5M", "-"] + "cmd":[ "teepot", "-v", "-m", "5M", "-" ] }, { "id":"reheader_headerSQfix", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[{"subst":"samtools_reheader"}, "__IN_SAMHEADER__", "__IN_BAM__"] + "cmd":[ {"subst":"samtools_executable"}, "reheader", "__IN_SAMHEADER__", "__IN_BAM__" ] }, { "id":"bam12split", @@ -104,49 +80,17 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bam12auxmerge", "level=0", "rankstrip=1", "ranksplit=0", "zztoname=0", "clipreinsert=1", "__NO_ALN_BAM__"] + "cmd":["bam12auxmerge", "level=0", "rankstrip=1", "ranksplit=0", "zztoname=0", "clipreinsert=1", "__NO_ALN_BAM_IN__"] } ], "edges":[ - { - "id":"tee_headerSQfix_to_sam", - "from":"tee_headerSQfix:__HEADER_FIX_OUT__", - "to":"sam_headerSQfix" - }, - { - "id":"sam_headerSQfix_to_alterSQ", - "from":"sam_headerSQfix", - "to":"alterSQ_headerSQfix" - }, - { - "id":"alterSQ_headerSQfix_to_reheader", - "from":"alterSQ_headerSQfix", - "to":"reheader_headerSQfix:__IN_SAMHEADER__" - }, - { - "id":"tee_headerSQfix_to_mbuffer", - "from":"tee_headerSQfix:__FULL_BAM_OUT__", - "to":"mbuffer_headerSQfix" - }, - { - "id":"mbuffer_headerSQfix_to_reheader", - "from":"mbuffer_headerSQfix", - "to":"reheader_headerSQfix:__IN_BAM__" - }, - { - "id":"reheader_headerSQfix_to_bam12split", - "from":"reheader_headerSQfix", - "to":"bam12split" - }, - { - "id":"bam12split_to_bamsort_qname", - "from":"bam12split", - "to":"bamsort_qname" - }, - { - "id":"bsqn_to_bam12auxmerge", - "from":"bamsort_qname", - "to":"bam12auxmerge" - } + { "id":"tee_headerSQfix_to_sam", "from":"tee_headerSQfix:__HEADER_FIX_OUT__", "to":"sam_headerSQfix" }, + { "id":"sam_headerSQfix_to_alterSQ", "from":"sam_headerSQfix", "to":"alterSQ_headerSQfix" }, + { "id":"alterSQ_headerSQfix_to_reheader", "from":"alterSQ_headerSQfix", "to":"reheader_headerSQfix:__IN_SAMHEADER__" }, + { "id":"tee_headerSQfix_to_mbuffer", "from":"tee_headerSQfix:__FULL_BAM_OUT__", "to":"mbuffer_headerSQfix" }, + { "id":"mbuffer_headerSQfix_to_reheader", "from":"mbuffer_headerSQfix", "to":"reheader_headerSQfix:__IN_BAM__" }, + { "id":"reheader_headerSQfix_to_bam12split", "from":"reheader_headerSQfix", "to":"bam12split" }, + { "id":"bam12split_to_bamsort_qname", "from":"bam12split", "to":"bamsort_qname" }, + { "id":"bsqn_to_bam12auxmerge", "from":"bamsort_qname", "to":"bam12auxmerge" } ] } diff --git a/data/vtlib/pre_alignment.json b/data/vtlib/pre_alignment.json index 98da89e91..c17a8055e 100644 --- a/data/vtlib/pre_alignment.json +++ b/data/vtlib/pre_alignment.json @@ -17,7 +17,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamcollate2 collate=3 level=0", + "cmd":[ "bamcollate2", "collate=3", "level=0" ], "comment":"already collated suitably - just here to do the ranking in the name" }, { @@ -25,7 +25,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamreset resetaux=0 level=0 verbose=0", + "cmd":[ "bamreset", "resetaux=0", "level=0", "verbose=0" ], "comment":"Alignment removal also required for bamadapterclip (at least 0.0.142)" }, { @@ -33,20 +33,12 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamadapterclip verbose=0 level=0", + "cmd":[ "bamadapterclip", "verbose=0", "level=0" ], "description":"Hard clip adapter sequence from reads before feeding to Tophat2" } ], "edges":[ - { - "id":"bamcollate2_ranking_to_bamreset_prealn", - "from":"bamcollate2_ranking", - "to":"bamreset_pre_alignment" - }, - { - "id":"bamreset_prealn_to_bamadapterclip", - "from":"bamreset_pre_alignment", - "to":"bamadapterclip" - } + { "id":"bamcollate2_ranking_to_bamreset_prealn", "from":"bamcollate2_ranking", "to":"bamreset_pre_alignment" }, + { "id":"bamreset_prealn_to_bamadapterclip", "from":"bamreset_pre_alignment", "to":"bamadapterclip" } ] } diff --git a/data/vtlib/pre_alignment_realign.json b/data/vtlib/pre_alignment_realign.json index 6f9300485..14f6f1796 100644 --- a/data/vtlib/pre_alignment_realign.json +++ b/data/vtlib/pre_alignment_realign.json @@ -17,7 +17,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamcollate2 collate=3 level=0", + "cmd":[ "bamcollate2", "collate=3", "level=0" ], "comment":"already collated suitably - just here to do the ranking in the name" }, { @@ -25,7 +25,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamreset resetaux=1 level=0 verbose=0", + "cmd":[ "bamreset", "resetaux=1", "level=0", "verbose=0" ], "comment":"Alignment removal also required for bamadapterclip (at least 0.0.142)" }, { @@ -33,20 +33,12 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamadapterfind clip=1 verbose=0 level=0", + "cmd":[ "bamadapterfind", "clip=1", "verbose=0", "level=0" ], "description":"Find and hard clip adapter sequence from reads before feeding to aligner" } ], "edges":[ - { - "id":"bamcollate2_ranking_to_bamreset_prealn", - "from":"bamcollate2_ranking", - "to":"bamreset_pre_alignment" - }, - { - "id":"bamreset_prealn_to_bamadapterclip", - "from":"bamreset_pre_alignment", - "to":"bamadapterclip" - } + { "id":"bamcollate2_ranking_to_bamreset_prealn", "from":"bamcollate2_ranking", "to":"bamreset_pre_alignment" }, + { "id":"bamreset_prealn_to_bamadapterclip", "from":"bamreset_pre_alignment", "to":"bamadapterclip" } ] } diff --git a/data/vtlib/realignment_wtsi_template.json b/data/vtlib/realignment_wtsi_template.json index df92b3e9f..dd4cd4f81 100644 --- a/data/vtlib/realignment_wtsi_template.json +++ b/data/vtlib/realignment_wtsi_template.json @@ -186,7 +186,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamcollate2", {"subst":"src_input_format_flag"}, "collate=1", "level=0"], + "cmd":[ "bamcollate2", {"subst":"src_input_format_flag"}, "collate=1", "level=0" ], "comment":"ensure BAM records are gathered by template i.e. queryname" }, { @@ -194,7 +194,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__"], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ], "comment":"specify parameter value teepot_tempdir_value to specif teepot tempdir" }, { @@ -237,7 +237,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamrecompress", "verbose=0", "numthreads=2"] + "cmd":[ "bamrecompress", "verbose=0", "numthreads=2" ] }, { "id":"initial_phix_aln_bam", diff --git a/data/vtlib/seqchksum.json b/data/vtlib/seqchksum.json index e79df5041..fe8be1b82 100644 --- a/data/vtlib/seqchksum.json +++ b/data/vtlib/seqchksum.json @@ -23,19 +23,7 @@ { "id":"rehash_match_flds"}, { "id":"rehash_acc_flag", "subst_constructor":{ "vals":[ "-a", {"subst":"rehash_acc_flds"} ], "postproc":{"op":"concat","pad":""} } }, { "id":"rehash_chksum_flag", "subst_constructor":{ "vals":[ "-c", {"subst":"rehash_chksum_flds"} ], "postproc":{"op":"concat","pad":""} } }, - { "id":"rehash_match_flag", "subst_constructor":{ "vals":[ "-m", {"subst":"rehash_match_flds"} ], "postproc":{"op":"concat","pad":""} } }, - { - "id":"seqchksum_merge", - "subst_constructor":{ - "vals":[ - "seqchksum_merge.pl", - {"subst":"rehash_acc_flag"}, - {"subst":"rehash_chksum_flag"}, - {"subst":"rehash_match_flag"} - ], - "postproc":{"op":"pack"} - } - } + { "id":"rehash_match_flag", "subst_constructor":{ "vals":[ "-m", {"subst":"rehash_match_flds"} ], "postproc":{"op":"concat","pad":""} } } ], "nodes":[ { @@ -43,21 +31,21 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamseqchksum" + "cmd":[ "bamseqchksum" ] }, { "id":"merge_output_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[ {"subst":"seqchksum_merge"}, "__TARGET_CHKSUM_IN__", "__PHIX_CHKSUM_IN__" ] + "cmd": [ "seqchksum_merge.pl", {"subst":"rehash_acc_flag"}, {"subst":"rehash_chksum_flag"}, {"subst":"rehash_match_flag"}, "__TARGET_CHKSUM_IN__", "__PHIX_CHKSUM_IN__" ] }, { "id":"cmp_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":"cmp __INPUTCHK_IN__ __OUTPUTCHK_IN__", + "cmd":[ "cmp", "__INPUTCHK_IN__", "__OUTPUTCHK_IN__" ], "description":"check input primary/sequence data matches output" } ], diff --git a/data/vtlib/seqchksum_hs.json b/data/vtlib/seqchksum_hs.json index 5ab9298b2..cdf156349 100644 --- a/data/vtlib/seqchksum_hs.json +++ b/data/vtlib/seqchksum_hs.json @@ -44,14 +44,23 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"bamseqchksum" + "cmd":[ "bamseqchksum" ] }, { "id":"merge_output_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[ {"subst":"seqchksum_merge"}, "__TARGET_CHKSUM_IN__", "__HUMAN_SPLIT_CHKSUM_IN__", "__PHIX_CHKSUM_IN__" ] + "orig_cmd":[ {"subst":"seqchksum_merge"}, "__TARGET_CHKSUM_IN__", "__HUMAN_SPLIT_CHKSUM_IN__", "__PHIX_CHKSUM_IN__" ], + "cmd": [ + "seqchksum_merge.pl", + {"subst":"rehash_acc_flag"}, + {"subst":"rehash_chksum_flag"}, + {"subst":"rehash_match_flag"}, + "__TARGET_CHKSUM_IN__", + "__HUMAN_SPLIT_CHKSUM_IN__", + "__PHIX_CHKSUM_IN__" + ] }, { "id":"cmp_seqchksum", diff --git a/data/vtlib/seqchksum_realign.json b/data/vtlib/seqchksum_realign.json index 139e6e7a5..458304efb 100644 --- a/data/vtlib/seqchksum_realign.json +++ b/data/vtlib/seqchksum_realign.json @@ -5,7 +5,7 @@ "ports":{ "inputs":{ "_stdin_":"seqchksum_input", - "target_seqchksum":"cmp_seqchksum:__OUTPUTCHK__" + "target_seqchksum":"cmp_seqchksum:__OUTPUTCHK_IN__" } } }, @@ -15,22 +15,18 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":["bamseqchksum",{"subst":"src_input_format_flag"}] + "cmd":[ "bamseqchksum",{"subst":"src_input_format_flag"} ] }, { "id":"cmp_seqchksum", "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":"cmp __INPUTCHK__ __OUTPUTCHK__", + "cmd":[ "cmp __INPUTCHK_IN__ __OUTPUTCHK_IN__" ], "description":"check input primary/sequence data matches output" } ], "edges":[ - { - "id":"input_chksum_to_cmp", - "from":"seqchksum_input", - "to":"cmp_seqchksum:__INPUTCHK__" - } + { "id":"input_chksum_to_cmp", "from":"seqchksum_input", "to":"cmp_seqchksum:__INPUTCHK_IN__" } ] } diff --git a/data/vtlib/tophat2_alignment.json b/data/vtlib/tophat2_alignment.json index 300414b82..a3f668f4f 100644 --- a/data/vtlib/tophat2_alignment.json +++ b/data/vtlib/tophat2_alignment.json @@ -5,7 +5,7 @@ "ports":{ "inputs":{ "_stdin_":"bamtofastq", - "reference":"tophat2:__REFERENCE_GENOME__" + "reference":"tophat2:__REFERENCE_GENOME_IN__" }, "outputs":{ "_stdout_":"bamcat" @@ -25,11 +25,7 @@ "required":"no", "default":"intfile_1.fq.gz", "subst_constructor":{ - "vals":[ - "intfile_1_", - {"subst":"rpt"}, - ".fq.gz" - ], + "vals":[ "intfile_1_", {"subst":"rpt"}, ".fq.gz" ], "postproc":{"op":"concat", "pad":""} } }, @@ -37,11 +33,7 @@ "id":"fastq1", "required":"yes", "subst_constructor":{ - "vals":[ - {"subst":"tmpdir"}, - "/", - {"subst":"fastq1_name"} - ], + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"fastq1_name"} ], "postproc":{"op":"concat", "pad":""} } }, @@ -50,11 +42,7 @@ "required":"no", "default":"intfile_2.fq.gz", "subst_constructor":{ - "vals":[ - "intfile_2_", - {"subst":"rpt"}, - ".fq.gz" - ], + "vals":[ "intfile_2_", {"subst":"rpt"}, ".fq.gz" ], "postproc":{"op":"concat", "pad":""} } }, @@ -62,11 +50,7 @@ "id":"fastq2", "required":"yes", "subst_constructor":{ - "vals":[ - {"subst":"tmpdir"}, - "/", - {"subst":"fastq2_name"} - ], + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"fastq2_name"} ], "postproc":{"op":"concat", "pad":""} } }, @@ -75,11 +59,7 @@ "id":"tophat_out", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"tophat_dir"}, - "_", - {"subst":"rpt"} - ], + "vals":[ {"subst":"tophat_dir"}, "_", {"subst":"rpt"} ], "postproc":{"op":"concat","pad":""} }, "default":"tophat_out" @@ -88,11 +68,7 @@ { "id":"transcriptome_val", "subst_constructor":{ - "vals":[ - {"subst":"reposdir"}, - "/transcriptomes/", - {"subst":"transcriptome_subpath"} - ], + "vals":[ {"subst":"reposdir"}, "/transcriptomes/", {"subst":"transcriptome_subpath"} ], "postproc":{"op":"concat","pad":""} } }, @@ -100,10 +76,7 @@ "id":"transcriptome_flag", "required":"no", "subst_constructor":{ - "vals":[ - "--transcriptome-index", - {"subst":"transcriptome_val"} - ], + "vals":[ "--transcriptome-index", {"subst":"transcriptome_val"} ], "postproc":{"op":"concat","pad":"="} } }, @@ -111,10 +84,7 @@ "id":"aligner_numthreads_flag", "required":"no", "subst_constructor":{ - "vals":[ - "--num-threads", - {"subst":"aligner_numthreads"} - ], + "vals":[ "--num-threads", {"subst":"aligner_numthreads"} ], "postproc":{"op":"concat","pad":"="} } }, @@ -122,43 +92,15 @@ "id":"library_type_flag", "required":"no", "subst_constructor":{ - "vals":[ - "--library-type", - {"subst":"library_type"} - ], + "vals":[ "--library-type", {"subst":"library_type"} ], "postproc":{"op":"concat","pad":"="} } }, - { - "id":"tophat2_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - "tophat2", - "--keep-fasta-order", - "--no-sort-bam", - "--output-dir", - {"subst":"tophat_out"}, - "--mate-inner-dist","100", - {"subst":"aligner_numthreads_flag"}, - {"subst":"library_type_flag"}, - "--no-coverage-search", - "--microexon-search", - {"subst":"transcriptome_flag"} - ], - "postproc":{"op":"pack","pad":" "} - } - }, { "id":"junctions_bed", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"tophat_dir"}, - "_", - {"subst":"rpt"}, - "/junctions.bed" - ], + "vals":[ {"subst":"tophat_dir"}, "_", {"subst":"rpt"}, "/junctions.bed" ], "postproc":{"op":"concat","pad":""} }, "default":"tophat_out/junctions.bed" @@ -167,12 +109,7 @@ "id":"insertions_bed", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"tophat_dir"}, - "_", - {"subst":"rpt"}, - "/insertions.bed" - ], + "vals":[ {"subst":"tophat_dir"}, "_", {"subst":"rpt"}, "/insertions.bed" ], "postproc":{"op":"concat","pad":""} }, "default":"tophat_out/insertions.bed" @@ -181,12 +118,7 @@ "id":"deletions_bed", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"tophat_dir"}, - "_", - {"subst":"rpt"}, - "/deletions.bed" - ], + "vals":[ {"subst":"tophat_dir"}, "_", {"subst":"rpt"}, "/deletions.bed" ], "postproc":{"op":"concat","pad":""} }, "default":"tophat_out/deletions.bed" @@ -196,13 +128,7 @@ "id":"accepted_hits_bam", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"tophat_dir"}, - "_", - {"subst":"rpt"}, - "/", - {"subst":"accepted_hits"} - ], + "vals":[ {"subst":"tophat_dir"}, "_", {"subst":"rpt"}, "/", {"subst":"accepted_hits"} ], "postproc":{"op":"concat","pad":""} }, "default":"tophat_out/accepted_hits.bam" @@ -212,13 +138,7 @@ "id":"unmapped_bam", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"tophat_dir"}, - "_", - {"subst":"rpt"}, - "/", - {"subst":"unmapped"} - ], + "vals":[ {"subst":"tophat_dir"}, "_", {"subst":"rpt"}, "/", {"subst":"unmapped"} ], "postproc":{"op":"concat","pad":""} }, "default":"tophat_out/unmapped.bam" @@ -227,12 +147,7 @@ "id":"cp_deletions_bed_target", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - ".deletions.bed" - ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, ".deletions.bed" ], "postproc":{"op":"concat","pad":""} } }, @@ -240,12 +155,7 @@ "id":"cp_insertions_bed_target", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - ".insertions.bed" - ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, ".insertions.bed" ], "postproc":{"op":"concat","pad":""} } }, @@ -253,12 +163,7 @@ "id":"cp_junctions_bed_target", "required":"no", "subst_constructor":{ - "vals":[ - {"subst":"outdatadir"}, - "/", - {"subst":"rpt"}, - ".junctions.bed" - ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, ".junctions.bed" ], "postproc":{"op":"concat","pad":""} } } @@ -269,7 +174,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["bamtofastq", "gz=1", "F=__FQOUT1__", "F2=__FQOUT2__"] + "cmd":["bamtofastq", "gz=1", "F=__FQ1_OUT__", "F2=__FQ2_OUT__"] }, { "id":"fq1", @@ -286,7 +191,22 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":[{"subst":"tophat2_cmd"}, "__REFERENCE_GENOME__", "__FQIN1__", "__FQIN2__"] + "cmd": [ + "tophat2", + "--keep-fasta-order", + "--no-sort-bam", + "--output-dir", + {"subst":"tophat_out"}, + "--mate-inner-dist","100", + {"subst":"aligner_numthreads_flag"}, + {"subst":"library_type_flag"}, + "--no-coverage-search", + "--microexon-search", + {"subst":"transcriptome_flag"} , + "__REFERENCE_GENOME__", + "__FQ1_IN__", + "__FQ2_IN__" + ] }, { "id":"junctions_bed", @@ -321,112 +241,44 @@ { "id":"bamcat", "type":"EXEC", - "cmd":["bamcat", "I=__IN_BAM1__", "I=__IN_BAM2__", "level=0"] + "cmd":[ "bamcat", "I=__IN_BAM1__", "I=__IN_BAM2__", "level=0" ] }, { "id":"cp_deletions_bed", "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":[ - "cp", - "__SRC_DELETIONS_BED__", - {"subst":"cp_deletions_bed_target"} - ] + "cmd":[ "cp", "__SRC_DELETIONS_BED_IN__", {"subst":"cp_deletions_bed_target"} ] }, { "id":"cp_insertions_bed", "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":[ - "cp", - "__SRC_INSERTIONS_BED__", - {"subst":"cp_insertions_bed_target"} - ] + "cmd":[ "cp", "__SRC_INSERTIONS_BED_IN__", {"subst":"cp_insertions_bed_target"} ] }, { "id":"cp_junctions_bed", "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":[ - "cp", - "__SRC_JUNCTIONS_BED__", - {"subst":"cp_junctions_bed_target"} - ] + "cmd":[ "cp", "__SRC_JUNCTIONS_BED_IN__", {"subst":"cp_junctions_bed_target"} ] } ], "edges":[ - { - "id":"bamtofastq_to_fq1", - "from":"bamtofastq:__FQOUT1__", - "to":"fq1" - }, - { - "id":"bamtofastq_to_fq2", - "from":"bamtofastq:__FQOUT2__", - "to":"fq2" - }, - { - "id":"fq1_to_tophat2", - "from":"fq1", - "to":"tophat2:__FQIN1__" - }, - { - "id":"fq2_to_tophat2", - "from":"fq2", - "to":"tophat2:__FQIN2__" - }, - { - "id":"tophat2_to_accepted_hits_bam", - "from":"tophat2", - "to":"accepted_hits_bam" - }, - { - "id":"tophat2_to_unmapped_bam", - "from":"tophat2", - "to":"unmapped_bam" - }, - { - "id":"tophat2_to_deletions_bed", - "from":"tophat2", - "to":"deletions_bed" - }, - { - "id":"cp_deletions_bed", - "from":"deletions_bed", - "to":"cp_deletions_bed:__SRC_DELETIONS_BED__" - }, - { - "id":"tophat2_to_insertions_bed", - "from":"tophat2", - "to":"insertions_bed" - }, - { - "id":"cp_insertions_bed", - "from":"insertions_bed", - "to":"cp_insertions_bed:__SRC_INSERTIONS_BED__" - }, - { - "id":"tophat2_to_junctions_bed", - "from":"tophat2", - "to":"junctions_bed" - }, - { - "id":"cp_junctions_bed", - "from":"junctions_bed", - "to":"cp_junctions_bed:__SRC_JUNCTIONS_BED__" - }, - { - "id":"accepted_hits_bam_to_bamcat", - "from":"accepted_hits_bam", - "to":"bamcat:__IN_BAM1__" - }, - { - "id":"unmapped_bam_to_bamcat", - "from":"unmapped_bam", - "to":"bamcat:__IN_BAM2__" - } + { "id":"bamtofastq_to_fq1", "from":"bamtofastq:__FQ1_OUT__", "to":"fq1" }, + { "id":"bamtofastq_to_fq2", "from":"bamtofastq:__FQ2_OUT__", "to":"fq2" }, + { "id":"fq1_to_tophat2", "from":"fq1", "to":"tophat2:__FQ1_IN__" }, + { "id":"fq2_to_tophat2", "from":"fq2", "to":"tophat2:__FQ2_IN__" }, + { "id":"tophat2_to_accepted_hits_bam", "from":"tophat2", "to":"accepted_hits_bam" }, + { "id":"tophat2_to_unmapped_bam", "from":"tophat2", "to":"unmapped_bam" }, + { "id":"tophat2_to_deletions_bed", "from":"tophat2", "to":"deletions_bed" }, + { "id":"cp_deletions_bed", "from":"deletions_bed", "to":"cp_deletions_bed:__SRC_DELETIONS_BED_IN__" }, + { "id":"tophat2_to_insertions_bed", "from":"tophat2", "to":"insertions_bed" }, + { "id":"cp_insertions_bed", "from":"insertions_bed", "to":"cp_insertions_bed:__SRC_INSERTIONS_BED_IN__" }, + { "id":"tophat2_to_junctions_bed", "from":"tophat2", "to":"junctions_bed" }, + { "id":"cp_junctions_bed", "from":"junctions_bed", "to":"cp_junctions_bed:__SRC_JUNCTIONS_BED_IN__" }, + { "id":"accepted_hits_bam_to_bamcat", "from":"accepted_hits_bam", "to":"bamcat:__IN_BAM1__" }, + { "id":"unmapped_bam_to_bamcat", "from":"unmapped_bam", "to":"bamcat:__IN_BAM2__" } ] } From 255947828df6866940bb0910d8421e9327df05c0 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 16 Sep 2015 09:37:29 +0100 Subject: [PATCH 07/40] data/vtlib/seqchksum_realign.json - change command from string to array data/vtlib/alignment_wtsi_stage2_template.json - remove orig_cmd attribute data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json - remove orig_cmd attribute --- ...ge2_humansplit_notargetalign_template.json | 1 - .../vtlib/alignment_wtsi_stage2_template.json | 1 - data/vtlib/seqchksum_realign.json | 2 +- t/data/10-vtfp-00.json | 19 ------------------- 4 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 t/data/10-vtfp-00.json diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json index ff7c6456e..082dbcaaa 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json @@ -261,7 +261,6 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "orig_cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__HUMAN_SPLIT_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=__HUMAN_SPLIT_OUTBAM__", "UNALIGNED=/dev/stdout", "METRICS_FILE=__AF_METRICS__"], "cmd": [ {"subst":"java_cmd"}, "-Xmx1000m", diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 557195829..2fa3d0dab 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -251,7 +251,6 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "orig_cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_BAM_IN__", "IN=__TARGET_BAM_IN__", "OUT=__PHIX_BAM_OUT__", "OUT=/dev/stdout", "METRICS_FILE=__AF_METRICS_OUT__"], "cmd": [ {"subst":"java_cmd"}, "-Xmx1000m", diff --git a/data/vtlib/seqchksum_realign.json b/data/vtlib/seqchksum_realign.json index 458304efb..040f2b3c3 100644 --- a/data/vtlib/seqchksum_realign.json +++ b/data/vtlib/seqchksum_realign.json @@ -22,7 +22,7 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": false, - "cmd":[ "cmp __INPUTCHK_IN__ __OUTPUTCHK_IN__" ], + "cmd":[ "cmp", "__INPUTCHK_IN__", "__OUTPUTCHK_IN__" ], "description":"check input primary/sequence data matches output" } ], diff --git a/t/data/10-vtfp-00.json b/t/data/10-vtfp-00.json deleted file mode 100644 index 0b2e40649..000000000 --- a/t/data/10-vtfp-00.json +++ /dev/null @@ -1,19 +0,0 @@ -{ -"description":"Test infinite recursion prevention in defaults for parameter resolution. Also provides an example of a parameter taking its default value from another parameter", -"version":"1.0", -"subst_params":[ - { "id": "p1", "default": "one" }, - { "id": "p2", "default": {"subst":"p3"} }, - { "id": "p3", "default": {"subst":"p4"} }, - { "id": "p4", "default": {"subst":"p5"} }, - { "id": "p5", "default": {"subst":"p2"} } -], -"nodes":[ - { - "id":"n1", - "type":"EXEC", - "cmd":[ "echo", {"subst":"p1"}, {"subst":"p2"} ] - } -] -} - From c772ebf0484fca8f334a8d210c6c39e76b16ea0c Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 16 Sep 2015 11:06:35 +0100 Subject: [PATCH 08/40] add missing test data --- t/data/10-vtfp-pv.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 t/data/10-vtfp-pv.json diff --git a/t/data/10-vtfp-pv.json b/t/data/10-vtfp-pv.json new file mode 100644 index 000000000..3dcee60ed --- /dev/null +++ b/t/data/10-vtfp-pv.json @@ -0,0 +1,18 @@ +{ +"description":"test export and import of parameter values using the -[export_]param_vals flags", +"version":"1.0", +"subst_params":[ + { "id": "subject", "default":"funeral"}, + { "id": "verb", "default":"ends"}, + { "id": "adj", "default":"mournful"}, + { "id": "prepobj", "subst_constructor":{ "vals":[ "fireworks", "display" ] } } +], +"nodes":[ + { + "id":"n1", + "type":"EXEC", + "cmd":[ "echo", "The", {"subst":"subject"}, {"subst":"verb"}, "with", "a", {"subst":"adj"}, {"subst":"prepobj"} ] + } +] +} + From bcbe5eff70f37bc55a6dbbe4f2ccebba69a5efeb Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 17 Sep 2015 16:49:58 +0100 Subject: [PATCH 09/40] new manifest --- MANIFEST | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/MANIFEST b/MANIFEST index c6d494f99..47711cac9 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2,6 +2,34 @@ bin/viv.pl bin/vtfp.pl Build.PL Changes +data/bcl2bam_phix_deplex_wtsi_stage1_template.vtf +data/bwa_post_proc.vtf +data/hiseqx.vtf +data/post_alignment.vtf +data/vtlib/alignment_common.json +data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json +data/vtlib/alignment_wtsi_stage2_humansplit_template.json +data/vtlib/alignment_wtsi_stage2_template.json +data/vtlib/auxmerge_prep.json +data/vtlib/auxmerge_prep_realign.json +data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json +data/vtlib/bwa_aln_alignment.json +data/vtlib/bwa_aln_se_alignment.json +data/vtlib/bwa_mem_alignment.json +data/vtlib/final_output_noalign_prep.json +data/vtlib/final_output_prep.json +data/vtlib/merge_aligned.json +data/vtlib/merge_final_output_prep.json +data/vtlib/post_alignment.json +data/vtlib/pre_alignment.json +data/vtlib/pre_alignment_realign.json +data/vtlib/README.vtlib +data/vtlib/realignment_wtsi_template.json +data/vtlib/seqchksum.json +data/vtlib/seqchksum_hs.json +data/vtlib/seqchksum_realign.json +data/vtlib/split_by_chromosome.json +data/vtlib/tophat2_alignment.json examples/bwa_aln_cfg.png examples/bwa_mem/bwa_mem_alignment.vtf examples/bwa_mem/generic_alignment_with_phix.vtf @@ -18,12 +46,22 @@ examples/tophat2_cfg.png MANIFEST This list of files README README.vtfp -t/data/simple_failing_pipeline.vtf -t/data/simple_pipeline.vtf t/00-scripts_compile.t +t/10-vtfp-array_expansion.t +t/10-vtfp-param_ring.t +t/10-vtfp-pv.t +t/10-vtfp-subst_directive.t +t/data/10-vtfp-array_expansion.json +t/data/10-vtfp-param_ring.json +t/data/10-vtfp-pv.json visualisation/bwa_aln.cfg visualisation/bwa_aln.json +visualisation/cgi-bin/getProgress visualisation/config_view.html +visualisation/edit.html +visualisation/p4_edit.css +visualisation/p4_edit.js visualisation/README.visualisation visualisation/tophat2.cfg visualisation/tophat2.json +visualisation/view.js From da781cb1d090f36a70251f511f1525c0e3ed1fae Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Sat, 19 Sep 2015 13:51:29 +0100 Subject: [PATCH 10/40] corrected reference port names (missed/inconsistent updates ti _IN__ naming convention) --- data/vtlib/bwa_aln_alignment.json | 2 +- data/vtlib/tophat2_alignment.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index ed482179c..e6a129532 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -54,7 +54,7 @@ { "id":"bwa_sampe", "type":"EXEC", - "cmd":[ {"subst":"bwa_executable"}, "sampe", "__REFERENCE_GENOME_FASTA__", "__SAI_1_IN__", "__SAI_2_IN__", "__BAM_1_IN__", "__BAM_2_IN__" ] + "cmd":[ {"subst":"bwa_executable"}, "sampe", "__REFERENCE_GENOME_FASTA_IN__", "__SAI_1_IN__", "__SAI_2_IN__", "__BAM_1_IN__", "__BAM_2_IN__" ] }, { "id":"samtobam", diff --git a/data/vtlib/tophat2_alignment.json b/data/vtlib/tophat2_alignment.json index a3f668f4f..ba4d2ce58 100644 --- a/data/vtlib/tophat2_alignment.json +++ b/data/vtlib/tophat2_alignment.json @@ -203,7 +203,7 @@ "--no-coverage-search", "--microexon-search", {"subst":"transcriptome_flag"} , - "__REFERENCE_GENOME__", + "__REFERENCE_GENOME_IN__", "__FQ1_IN__", "__FQ2_IN__" ] From 9864a9e86bb1980265af5462ebd7bc8a8e4a0fa4 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 23 Sep 2015 15:27:46 +0100 Subject: [PATCH 11/40] update Changes --- Changes | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Changes b/Changes index a3edd97ea..749f5e851 100644 --- a/Changes +++ b/Changes @@ -1,7 +1,22 @@ CHANGES LOG ----------- + + - port naming conventions (IN/OUT pre- and postfixes) adopted in templates and enforced in viv.pl + - vtfp.pl + improved error reporting + refactoring to ensure more consistent/intuitive evaluation of parameter values + allow specification of "local" parameter substitution (within a specific vtnode) + added --param_vals --export_param_vals flags + subst directive attributes ifnull and required added + remove dead code, review relevance of comments, general tidying + more tests + +release 0.17 + - human split with no target align (secondary stage) template introduced + - initial work for stage one (bcl, adapter, phiX and spatial filter to split processing) using templates - bammarkduplicates reintroduced for unaligned file because downstream qc processing relies on presence of markdups_metrics file - library cram merging: merge_aligned.json and merge_final_output_prep.json + - remove potential deadlock by using non-blocking open of STDIN release 0.16.4 - add extra branch to teepot command in to stream seqchksum output downstream (instead of using a file as an internal node) From cf7bca6975bec45b467d8e6ce0e5c85ba96250d6 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 24 Sep 2015 16:50:01 +0100 Subject: [PATCH 12/40] remove commented code --- bin/viv.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/viv.pl b/bin/viv.pl index c8f0fa73a..aa374b015 100755 --- a/bin/viv.pl +++ b/bin/viv.pl @@ -240,7 +240,6 @@ sub _update_node_data_xfer { #ensure port is connected to in manner suggested by naming convention croak 'Node '.($node->{'id'})." port $port connected as ".($edge_side == $FROM?q("from"):q("to")) if (($inout eq q(OUT))^($edge_side == $FROM)); } else { -# $logger->($VLMED, 'Node '.($node->{'id'})." has poorly described port $port (no _{IN,OUT}__ {suf,pre}fix)\n"); croak 'Node '.($node->{'id'})." has poorly described port $port (no _{IN,OUT}__ {suf,pre}fix)\n"; } my $cmd = $node->{'cmd'}; From 9dc1fbfe1e80e565c845cbdb1467406a15494b1a Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 25 Sep 2015 09:19:54 +0100 Subject: [PATCH 13/40] added Hash::Merge to rquired modules in Build.PL --- Build.PL | 1 + 1 file changed, 1 insertion(+) diff --git a/Build.PL b/Build.PL index 98f486de3..022f437c6 100644 --- a/Build.PL +++ b/Build.PL @@ -73,6 +73,7 @@ my $builder = $class->new( 'File::Which' => 0, 'Getopt::Long' => 0, 'Getopt::Std' => 0, + 'Hash::Merge' => 0, 'JSON' => 0, 'List::MoreUtils' => 0, 'POSIX' => 0, From be1b36e0e5fcdf0ff855a15e145f440d806338d2 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 25 Sep 2015 11:50:42 +0100 Subject: [PATCH 14/40] removed commented code --- bin/vtfp.pl | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/vtfp.pl b/bin/vtfp.pl index 53d436401..44c9f28dc 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -1037,8 +1037,6 @@ sub combine_pvs { push @all_pvs, $clpv; # add parameter value structure created from command-line } -# Hash::Merge::set_behavior( 'RIGHT_PRECEDENT' ); -# Hash::Merge::set_behavior( 'LEFT_PRECEDENT' ); # merge user-supplied params files with slightly modified RIGHT_PRECEDENT behaviour Hash::Merge::specify_behavior( { From d130a3f12481cfbfa5abc41a97b68a336c020567 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Fri, 25 Sep 2015 13:55:39 +0100 Subject: [PATCH 15/40] Restore deleted test data for viv.pl --- t/data/simple_carping_pipeline1.vtf | 40 ++++++++++++++++++++++++++ t/data/simple_failing_io_pipeline1.vtf | 40 ++++++++++++++++++++++++++ t/data/simple_failing_io_pipeline2.vtf | 40 ++++++++++++++++++++++++++ t/data/simple_failing_pipeline.vtf | 33 +++++++++++++++++++++ t/data/simple_pipeline.vtf | 40 ++++++++++++++++++++++++++ 5 files changed, 193 insertions(+) create mode 100644 t/data/simple_carping_pipeline1.vtf create mode 100644 t/data/simple_failing_io_pipeline1.vtf create mode 100644 t/data/simple_failing_io_pipeline2.vtf create mode 100644 t/data/simple_failing_pipeline.vtf create mode 100644 t/data/simple_pipeline.vtf diff --git a/t/data/simple_carping_pipeline1.vtf b/t/data/simple_carping_pipeline1.vtf new file mode 100644 index 000000000..28915b833 --- /dev/null +++ b/t/data/simple_carping_pipeline1.vtf @@ -0,0 +1,40 @@ +{ + "description": "minimal carping test pipeline - works, but I/O port names are poorly named. See log for warning", + "nodes":[ + { "id": "n1", + "type": "EXEC", + "cmd": "echo stuff", + "use_STDIN": 0, + "use_STDOUT": 1 + }, + { "id": "n2", + "type": "EXEC", + "cmd": ["cat"], + "use_STDIN": 1, + "use_STDOUT": 1 + }, + { "id": "m", + "type": "EXEC", + "cmd": ["echo", "stuff"], + "use_STDIN": false, + "use_STDOUT": true + }, + { "id": "d", + "type": "EXEC", + "cmd": "diff __IN1__ __IN2__", + "use_STDIN": 0, + "use_STDOUT": 0 + } + ], + "edges":[ + { "from": "n1", + "to": "n2" + }, + { "from": "n2", + "to": "d:__IN1__" + }, + { "from": "m", + "to": "d:__IN2__" + } + ] +} diff --git a/t/data/simple_failing_io_pipeline1.vtf b/t/data/simple_failing_io_pipeline1.vtf new file mode 100644 index 000000000..1b946d304 --- /dev/null +++ b/t/data/simple_failing_io_pipeline1.vtf @@ -0,0 +1,40 @@ +{ + "description": "minimal failing test pipeline. Port name in edge does not match one in node definition. Also carps about poorly named __IN1__ and __INN2__ ports", + "nodes":[ + { "id": "n1", + "type": "EXEC", + "cmd": "echo stuff", + "use_STDIN": 0, + "use_STDOUT": 1 + }, + { "id": "n2", + "type": "EXEC", + "cmd": ["cat"], + "use_STDIN": 1, + "use_STDOUT": 1 + }, + { "id": "m", + "type": "EXEC", + "cmd": ["echo", "stuff"], + "use_STDIN": false, + "use_STDOUT": true + }, + { "id": "d", + "type": "EXEC", + "cmd": "diff __IN1__ __IN2__", + "use_STDIN": 0, + "use_STDOUT": 0 + } + ], + "edges":[ + { "from": "n1", + "to": "n2" + }, + { "from": "n2", + "to": "d:__IN1__" + }, + { "from": "m", + "to": "d:__INN2__" + } + ] +} diff --git a/t/data/simple_failing_io_pipeline2.vtf b/t/data/simple_failing_io_pipeline2.vtf new file mode 100644 index 000000000..91b9b683f --- /dev/null +++ b/t/data/simple_failing_io_pipeline2.vtf @@ -0,0 +1,40 @@ +{ + "description": "minimal failing test pipeline. Port names in edges match the node definitions, but are of the wrong type", + "nodes":[ + { "id": "n1", + "type": "EXEC", + "cmd": "echo stuff", + "use_STDIN": 0, + "use_STDOUT": 1 + }, + { "id": "n2", + "type": "EXEC", + "cmd": ["cat"], + "use_STDIN": 1, + "use_STDOUT": 1 + }, + { "id": "m", + "type": "EXEC", + "cmd": ["echo", "stuff"], + "use_STDIN": false, + "use_STDOUT": true + }, + { "id": "d", + "type": "EXEC", + "cmd": "diff __IN_1__ __OUT_2__", + "use_STDIN": 0, + "use_STDOUT": 0 + } + ], + "edges":[ + { "from": "n1", + "to": "n2" + }, + { "from": "n2", + "to": "d:__IN_1__" + }, + { "from": "m", + "to": "d:__OUT_2__" + } + ] +} diff --git a/t/data/simple_failing_pipeline.vtf b/t/data/simple_failing_pipeline.vtf new file mode 100644 index 000000000..dac3cb42d --- /dev/null +++ b/t/data/simple_failing_pipeline.vtf @@ -0,0 +1,33 @@ +{ + "description": "minimal failing test pipeline. Fails because of false in cmd. Also carps about poorly named __IN1__ and __IN2__ ports", + "nodes":[ + { "id": "n1", + "type": "EXEC", + "cmd": "echo stuff; sleep 1 && cat" + }, + { "id": "n2", + "type": "EXEC", + "ocmd": "cat; false", + "cmd": "head -1; false" + }, + { "id": "m", + "type": "EXEC", + "cmd": ["echo", "stuff"] + }, + { "id": "d", + "type": "EXEC", + "cmd": "diff __IN1__ __IN2__" + } + ], + "edges":[ + { "from": "n1", + "to": "n2" + }, + { "from": "n2", + "to": "d:__IN1__" + }, + { "from": "m", + "to": "d:__IN2__" + } + ] +} diff --git a/t/data/simple_pipeline.vtf b/t/data/simple_pipeline.vtf new file mode 100644 index 000000000..631f2835d --- /dev/null +++ b/t/data/simple_pipeline.vtf @@ -0,0 +1,40 @@ +{ + "description": "minimal test pipeline", + "nodes":[ + { "id": "n1", + "type": "EXEC", + "cmd": "echo stuff", + "use_STDIN": 0, + "use_STDOUT": 1 + }, + { "id": "n2", + "type": "EXEC", + "cmd": ["cat"], + "use_STDIN": 1, + "use_STDOUT": 1 + }, + { "id": "m", + "type": "EXEC", + "cmd": ["echo", "stuff"], + "use_STDIN": false, + "use_STDOUT": true + }, + { "id": "d", + "type": "EXEC", + "cmd": "diff __IN_1__ __IN_2__", + "use_STDIN": 0, + "use_STDOUT": 0 + } + ], + "edges":[ + { "from": "n1", + "to": "n2" + }, + { "from": "n2", + "to": "d:__IN_1__" + }, + { "from": "m", + "to": "d:__IN_2__" + } + ] +} From c9b64dac8a5c077591f247df3a271e621758cdf9 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Fri, 25 Sep 2015 22:25:04 +0100 Subject: [PATCH 16/40] add tests for viv.pl --- Changes | 1 + t/50-viv.t | 24 ++++++++++++++++++++++++ t/data/simple_carping_pipeline1.vtf | 2 +- t/data/simple_failing_io_pipeline1.vtf | 8 ++++---- t/data/simple_failing_pipeline.vtf | 8 ++++---- 5 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 t/50-viv.t diff --git a/Changes b/Changes index 749f5e851..77a9aa8c0 100644 --- a/Changes +++ b/Changes @@ -10,6 +10,7 @@ CHANGES LOG subst directive attributes ifnull and required added remove dead code, review relevance of comments, general tidying more tests + - add tests for viv.pl release 0.17 - human split with no target align (secondary stage) template introduced diff --git a/t/50-viv.t b/t/50-viv.t new file mode 100644 index 000000000..8e09a5021 --- /dev/null +++ b/t/50-viv.t @@ -0,0 +1,24 @@ +use strict; +use warnings; +use Test::More tests => 11; +use Test::Cmd; +use Cwd; + +my$odir=getcwd(); + +my $test = Test::Cmd->new( prog => $odir.'/bin/viv.pl', workdir => q()); #, match_sub => sub{my($ret,$exp)=@_; return 0 ; $ret=~m/\Q$exp\E/smx} ); +ok($test, 'made test object'); +foreach( + ['simple_carping_pipeline1.vtf', 255, 'Node d has poorly described port __IN1__'], + ['simple_failing_io_pipeline1.vtf', 255, 'Node d has no port __IN_N2__'], + ['simple_failing_io_pipeline2.vtf', 255, 'Node d port __OUT_2__ connected as "to"'], + ['simple_failing_pipeline.vtf', 10, 'Exiting due to abnormal return from child n2'], + ['simple_pipeline.vtf', 0, '(viv) - Done'] +){ + my($vtf, $estatus, $eerror)=@$_; + my$exit_status = $test->run(chdir => $test->curdir, args => "-s -x $odir/t/data/$vtf"); + cmp_ok($exit_status>>8, q(==), $estatus, "expected exit status of $estatus for $vtf"); + like($test->stderr,qr(\Q$eerror\E)smx, "expected err info for $vtf"); +} + + diff --git a/t/data/simple_carping_pipeline1.vtf b/t/data/simple_carping_pipeline1.vtf index 28915b833..eec000ba8 100644 --- a/t/data/simple_carping_pipeline1.vtf +++ b/t/data/simple_carping_pipeline1.vtf @@ -1,5 +1,5 @@ { - "description": "minimal carping test pipeline - works, but I/O port names are poorly named. See log for warning", + "description": "minimal failing test pipeline - fails as I/O port names are poorly named", "nodes":[ { "id": "n1", "type": "EXEC", diff --git a/t/data/simple_failing_io_pipeline1.vtf b/t/data/simple_failing_io_pipeline1.vtf index 1b946d304..004de08d8 100644 --- a/t/data/simple_failing_io_pipeline1.vtf +++ b/t/data/simple_failing_io_pipeline1.vtf @@ -1,5 +1,5 @@ { - "description": "minimal failing test pipeline. Port name in edge does not match one in node definition. Also carps about poorly named __IN1__ and __INN2__ ports", + "description": "minimal failing test pipeline. Port name in edge does not match one in node definition", "nodes":[ { "id": "n1", "type": "EXEC", @@ -21,7 +21,7 @@ }, { "id": "d", "type": "EXEC", - "cmd": "diff __IN1__ __IN2__", + "cmd": "diff __IN_1__ __IN_2__", "use_STDIN": 0, "use_STDOUT": 0 } @@ -31,10 +31,10 @@ "to": "n2" }, { "from": "n2", - "to": "d:__IN1__" + "to": "d:__IN_1__" }, { "from": "m", - "to": "d:__INN2__" + "to": "d:__IN_N2__" } ] } diff --git a/t/data/simple_failing_pipeline.vtf b/t/data/simple_failing_pipeline.vtf index dac3cb42d..e64fb0c64 100644 --- a/t/data/simple_failing_pipeline.vtf +++ b/t/data/simple_failing_pipeline.vtf @@ -1,5 +1,5 @@ { - "description": "minimal failing test pipeline. Fails because of false in cmd. Also carps about poorly named __IN1__ and __IN2__ ports", + "description": "minimal failing test pipeline. Fails because of false in cmd", "nodes":[ { "id": "n1", "type": "EXEC", @@ -16,7 +16,7 @@ }, { "id": "d", "type": "EXEC", - "cmd": "diff __IN1__ __IN2__" + "cmd": "diff __IN_1__ __IN_2__" } ], "edges":[ @@ -24,10 +24,10 @@ "to": "n2" }, { "from": "n2", - "to": "d:__IN1__" + "to": "d:__IN_1__" }, { "from": "m", - "to": "d:__IN2__" + "to": "d:__IN_2__" } ] } From 6efa1319e898b757be30a30edec953aa4ed51f20 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Fri, 25 Sep 2015 22:30:25 +0100 Subject: [PATCH 17/40] consistent naming for test data --- t/50-viv.t | 10 +++++----- ...g_pipeline1.vtf => 50-viv_failing_io_pipeline0.vtf} | 0 ...o_pipeline1.vtf => 50-viv_failing_io_pipeline1.vtf} | 0 ...o_pipeline2.vtf => 50-viv_failing_io_pipeline2.vtf} | 0 ...ailing_pipeline.vtf => 50-viv_failing_pipeline.vtf} | 0 t/data/{simple_pipeline.vtf => 50-viv_pipeline.vtf} | 0 6 files changed, 5 insertions(+), 5 deletions(-) rename t/data/{simple_carping_pipeline1.vtf => 50-viv_failing_io_pipeline0.vtf} (100%) rename t/data/{simple_failing_io_pipeline1.vtf => 50-viv_failing_io_pipeline1.vtf} (100%) rename t/data/{simple_failing_io_pipeline2.vtf => 50-viv_failing_io_pipeline2.vtf} (100%) rename t/data/{simple_failing_pipeline.vtf => 50-viv_failing_pipeline.vtf} (100%) rename t/data/{simple_pipeline.vtf => 50-viv_pipeline.vtf} (100%) diff --git a/t/50-viv.t b/t/50-viv.t index 8e09a5021..d06c75a2c 100644 --- a/t/50-viv.t +++ b/t/50-viv.t @@ -9,11 +9,11 @@ my$odir=getcwd(); my $test = Test::Cmd->new( prog => $odir.'/bin/viv.pl', workdir => q()); #, match_sub => sub{my($ret,$exp)=@_; return 0 ; $ret=~m/\Q$exp\E/smx} ); ok($test, 'made test object'); foreach( - ['simple_carping_pipeline1.vtf', 255, 'Node d has poorly described port __IN1__'], - ['simple_failing_io_pipeline1.vtf', 255, 'Node d has no port __IN_N2__'], - ['simple_failing_io_pipeline2.vtf', 255, 'Node d port __OUT_2__ connected as "to"'], - ['simple_failing_pipeline.vtf', 10, 'Exiting due to abnormal return from child n2'], - ['simple_pipeline.vtf', 0, '(viv) - Done'] + ['50-viv_failing_io_pipeline0.vtf', 255, 'Node d has poorly described port __IN1__'], + ['50-viv_failing_io_pipeline1.vtf', 255, 'Node d has no port __IN_N2__'], + ['50-viv_failing_io_pipeline2.vtf', 255, 'Node d port __OUT_2__ connected as "to"'], + ['50-viv_failing_pipeline.vtf', 10, 'Exiting due to abnormal return from child n2'], + ['50-viv_pipeline.vtf', 0, '(viv) - Done'] ){ my($vtf, $estatus, $eerror)=@$_; my$exit_status = $test->run(chdir => $test->curdir, args => "-s -x $odir/t/data/$vtf"); diff --git a/t/data/simple_carping_pipeline1.vtf b/t/data/50-viv_failing_io_pipeline0.vtf similarity index 100% rename from t/data/simple_carping_pipeline1.vtf rename to t/data/50-viv_failing_io_pipeline0.vtf diff --git a/t/data/simple_failing_io_pipeline1.vtf b/t/data/50-viv_failing_io_pipeline1.vtf similarity index 100% rename from t/data/simple_failing_io_pipeline1.vtf rename to t/data/50-viv_failing_io_pipeline1.vtf diff --git a/t/data/simple_failing_io_pipeline2.vtf b/t/data/50-viv_failing_io_pipeline2.vtf similarity index 100% rename from t/data/simple_failing_io_pipeline2.vtf rename to t/data/50-viv_failing_io_pipeline2.vtf diff --git a/t/data/simple_failing_pipeline.vtf b/t/data/50-viv_failing_pipeline.vtf similarity index 100% rename from t/data/simple_failing_pipeline.vtf rename to t/data/50-viv_failing_pipeline.vtf diff --git a/t/data/simple_pipeline.vtf b/t/data/50-viv_pipeline.vtf similarity index 100% rename from t/data/simple_pipeline.vtf rename to t/data/50-viv_pipeline.vtf From f56eda28863f884fa449be4112f3c82fc57166ff Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Fri, 25 Sep 2015 22:46:00 +0100 Subject: [PATCH 18/40] refine build vs test dependencies --- Build.PL | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Build.PL b/Build.PL index 022f437c6..658e67adb 100644 --- a/Build.PL +++ b/Build.PL @@ -60,8 +60,12 @@ my $builder = $class->new( 'build_requires' => { 'English' => 0, + }, + 'test_requires' => { 'Test::More' => 0, 'Test::Compile' => 0, + 'Test::Cmd' => 0, + 'Perl6::Slurp' => 0, }, 'requires' => { From ec5020ae78600cb909cc3a890ccfe385740e7131 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Fri, 25 Sep 2015 22:48:33 +0100 Subject: [PATCH 19/40] avoid polluting working directory when running tests --- t/10-vtfp-pv.t | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/t/10-vtfp-pv.t b/t/10-vtfp-pv.t index bafe6e845..602c414fc 100644 --- a/t/10-vtfp-pv.t +++ b/t/10-vtfp-pv.t @@ -6,10 +6,12 @@ use Test::Deep; use Perl6::Slurp; use Data::Dumper; use JSON; +use File::Temp qw(tempdir); +my $tdir = tempdir(CLEANUP => 1); my $template = q[t/data/10-vtfp-pv.json]; -my $pv_file = q[t/data/10-vtfp-pv.pv]; -my $processed_template = q[t/data/10-vtfp-pv-processed.json]; +my $pv_file = $tdir.q[/10-vtfp-pv.pv]; +my $processed_template = $tdir.q[/10-vtfp-pv-processed.json]; # just export and reimport parameter values for a template subtest 'pv0' => sub { From 3ac987cf222e818a7c8fe907a816599ffd39e634 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Fri, 25 Sep 2015 22:59:08 +0100 Subject: [PATCH 20/40] initial Travis-CI config --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..c8a567aff --- /dev/null +++ b/.travis.yml @@ -0,0 +1,4 @@ +sudo: false +language: perl +perl: + - "5.16" From f0c1c9be896fdcb3314d3a3329a2f98d99084fde Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Sat, 26 Sep 2015 17:35:32 +0100 Subject: [PATCH 21/40] Add Github Releases deployment from Travis --- .travis.yml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c8a567aff..28355ede4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,29 @@ sudo: false language: perl perl: - - "5.16" + - 5.16 + +env: + global: + - secure: "Yaiv+tTD2FnvN2UwA9yEVIzLQMXR7yhLkuzJ9521MNM354SqKJUFtMLbQfdA7Rld5npylBSl1xfUOwjiraDBvtbjnC6PKpaz2fyR8/ycGptMjbWncxjNiSIxYelmw/JD9qu74Z2yIKzty7O4WqJ1IbP3OrQd/aTYViekibl7hIo=" + +after_success: + - ./Build dist + - export DIST_FILE=$(ls p4-*.tar.gz) + - export MD5_FILE=$DIST_FILE.md5 + - md5sum $DIST_FILE > $MD5_FILE + - export SHA256_FILE=$DIST_FILE.sha256 + - shasum -a 256 $DIST_FILE > $SHA256_FILE + +deploy: + provider: releases + api-key: $GH_OAUTH + file: + - $DIST_FILE + - $MD5_FILE + - $SHA256_FILE + skip_cleanup: true + on: + perl: 5.16 + tags: true + all_branches: true From b27679c78d01bb4c151856a2bd36906a495c4d04 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 28 Sep 2015 17:17:57 +0100 Subject: [PATCH 22/40] amend port names to follow required IN/OUT convention make subst directives for es_alignment_reference_genome and hs_alignment_reference_genome required --- ...lit_extrasplit_notargetalign_template.json | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json index 685cbf02f..0dbb8a13c 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json @@ -278,7 +278,7 @@ { "id":"es_alignment_reference_genome", "type":"INFILE", - "name":{"subst":"es_alignment_reference_genome"}, + "name":{"subst":"es_alignment_reference_genome", "required":"yes"}, "description":"Prefix for reference fasta and Bowtie2 index files" }, { @@ -314,7 +314,7 @@ { "id":"hs_alignment_reference_genome", "type":"INFILE", - "name":{"subst":"hs_alignment_reference_genome"}, + "name":{"subst":"hs_alignment_reference_genome","required":"yes"}, "description":"Prefix for reference fasta and Bowtie2 index files" }, { @@ -374,7 +374,7 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_INBAM__", "IN=__HUMAN_SPLIT_INBAM__", "IN=__EXTRA_SPLIT_INBAM__", "OUT=__PHIX_OUTBAM__", "OUT=__HUMAN_SPLIT_OUTBAM__", "OUT=__EXTRA_SPLIT_OUTBAM__", "UNALIGNED=/dev/stdout", "METRICS_FILE=__AF_METRICS__"] + "cmd":[{"subst":"alignment_filter_cmd"}, "IN=__PHIX_BAM_IN__", "IN=__HUMAN_SPLIT_BAM_IN__", "IN=__EXTRA_SPLIT_BAM_IN__", "OUT=__PHIX_BAM_OUT__", "OUT=__HUMAN_SPLIT_BAM_OUT__", "OUT=__EXTRA_SPLIT_BAM_OUT__", "UNALIGNED=/dev/stdout", "METRICS_FILE=__AF_METRICS_OUT__"] }, { "id":"af_metrics", @@ -444,19 +444,19 @@ { "id":"esref_to_alignment", "from":"es_alignment_reference_genome", "to":"alignment_es:reference" }, { "id":"alignment_es_to_post_alignment_es", "from":"alignment_es", "to":"post_alignment_es" }, { "id":"reference_dict_es_to_post_alignment", "from":"reference_dict_es", "to":"post_alignment_es:reference_dict" }, - { "id":"postalnes_to_alignment_filter", "from":"post_alignment_es", "to":"alignment_filter:__EXTRA_SPLIT_INBAM__" }, + { "id":"postalnes_to_alignment_filter", "from":"post_alignment_es", "to":"alignment_filter:__EXTRA_SPLIT_BAM_IN__" }, { "id":"t0_to_prealnhs", "from":"tee0:__HUMAN_SPLIT_OUT__", "to":"pre_alignment_hs" }, { "id":"prealnhs_to_alnhs", "from":"pre_alignment_hs", "to":"alignment_hs" }, { "id":"hsref_to_alignment", "from":"hs_alignment_reference_genome", "to":"alignment_hs:reference" }, { "id":"alignment_hs_to_post_alignment_hs", "from":"alignment_hs", "to":"post_alignment_hs" }, { "id":"reference_dict_hs_to_post_alignment", "from":"reference_dict_hs", "to":"post_alignment_hs:reference_dict" }, - { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_INBAM__" }, - { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_INBAM__" }, - { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS__", "to":"af_metrics" }, + { "id":"postalnhs_to_alignment_filter", "from":"post_alignment_hs", "to":"alignment_filter:__HUMAN_SPLIT_BAM_IN__" }, + { "id":"iab_to_alignment_filter", "from":"initial_phix_aln_bam", "to":"alignment_filter:__PHIX_BAM_IN__" }, + { "id":"alignment_filter_to_metrics", "from":"alignment_filter:__AF_METRICS_OUT__", "to":"af_metrics" }, { "id":"af_to_fopt", "from":"alignment_filter", "to":"final_output_prep_target" }, - { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_OUTBAM__", "to":"final_output_prep_phix" }, - { "id":"af_to_fopes", "from":"alignment_filter:__EXTRA_SPLIT_OUTBAM__", "to":"final_output_prep_es" }, - { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_OUTBAM__", "to":"final_output_prep_hs" }, + { "id":"af_to_fopp", "from":"alignment_filter:__PHIX_BAM_OUT__", "to":"final_output_prep_phix" }, + { "id":"af_to_fopes", "from":"alignment_filter:__EXTRA_SPLIT_BAM_OUT__", "to":"final_output_prep_es" }, + { "id":"af_to_fophs", "from":"alignment_filter:__HUMAN_SPLIT_BAM_OUT__", "to":"final_output_prep_hs" }, { "id":"src_bam_to_seqchksum", "from":"src_bam", "to":"seqchksum" }, { "id":"fopt_to_bam", "from":"final_output_prep_target", "to":"seqchksum:target_seqchksum" }, { "id":"fopp_to_bam_phix", "from":"final_output_prep_phix", "to":"seqchksum:phix_seqchksum" }, From 2b18e4f710450d8741b0297190dbca2c25118ee6 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 29 Sep 2015 10:04:33 +0100 Subject: [PATCH 23/40] Set compression level when using scramble 0 for sam to bam conversion of aligner output 7 for final cram file product --- data/vtlib/bwa_aln_alignment.json | 2 +- data/vtlib/bwa_aln_se_alignment.json | 2 +- data/vtlib/bwa_mem_alignment.json | 2 +- data/vtlib/final_output_noalign_prep.json | 2 +- data/vtlib/final_output_prep.json | 2 +- data/vtlib/merge_final_output_prep.json | 1 + 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index e6a129532..9b1e1d4be 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -59,7 +59,7 @@ { "id":"samtobam", "type":"EXEC", - "cmd":[ "scramble", "-I", "sam", "-O", "bam" ] + "cmd":[ "scramble", "-0", "-I", "sam", "-O", "bam" ] } ], "edges":[ diff --git a/data/vtlib/bwa_aln_se_alignment.json b/data/vtlib/bwa_aln_se_alignment.json index 008613f91..985f38e80 100644 --- a/data/vtlib/bwa_aln_se_alignment.json +++ b/data/vtlib/bwa_aln_se_alignment.json @@ -47,7 +47,7 @@ { "id":"samtobam", "type":"EXEC", - "cmd":"scramble -I sam -O bam" + "cmd":[ "scramble", "-0", "-I", "sam", "-O", "bam" ] } ], "edges":[ diff --git a/data/vtlib/bwa_mem_alignment.json b/data/vtlib/bwa_mem_alignment.json index 52dad88b5..fb4e0a954 100644 --- a/data/vtlib/bwa_mem_alignment.json +++ b/data/vtlib/bwa_mem_alignment.json @@ -43,7 +43,7 @@ { "id":"samtobam", "type":"EXEC", - "cmd":["scramble", "-I", "sam", "-O", "bam"] + "cmd":["scramble", "-0", "-I", "sam", "-O", "bam"] } ], "edges":[ diff --git a/data/vtlib/final_output_noalign_prep.json b/data/vtlib/final_output_noalign_prep.json index 2e5f6a448..3f28d3aa4 100644 --- a/data/vtlib/final_output_noalign_prep.json +++ b/data/vtlib/final_output_noalign_prep.json @@ -176,7 +176,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ {"subst":"scramble_executable"}, "-I", "bam", "-O", "cram" ] + "cmd": [ {"subst":"scramble_executable"}, "-7", "-I", "bam", "-O", "cram" ] }, { "id":"scramble_tee", diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 9cd03a228..eb45b0820 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -217,7 +217,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ {"subst":"scramble_executable"}, "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] + "cmd": [ {"subst":"scramble_executable"}, "-7", "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index e7046704c..b6e639553 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -32,6 +32,7 @@ "subst_constructor":{ "vals":[ {"subst":"scramble_executable"}, + "-7", "-I", "bam", "-O", From 1b0514693ff1ae56d509dc9b300ef8eab7785020 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 29 Sep 2015 10:39:05 +0100 Subject: [PATCH 24/40] Remove unused scramble_executable substitution --- data/vtlib/alignment_common.json | 1 - data/vtlib/final_output_noalign_prep.json | 2 +- data/vtlib/final_output_prep.json | 2 +- data/vtlib/merge_final_output_prep.json | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data/vtlib/alignment_common.json b/data/vtlib/alignment_common.json index 345addb38..5b5db9eaa 100644 --- a/data/vtlib/alignment_common.json +++ b/data/vtlib/alignment_common.json @@ -13,7 +13,6 @@ {"id":"reposdir","required":"no","default":"."}, {"id":"samtools_executable","required":"no","default":"samtools"}, {"id":"bwa_executable","required":"no","default":"bwa0_6"}, - {"id":"scramble_executable","required":"no","default":"scramble"}, {"id":"aligner_numthreads","required":"no","default":"2"}, {"id":"bam_ext","required":"no","default":".bam"}, {"id":"cram_ext","required":"no","default":".cram"}, diff --git a/data/vtlib/final_output_noalign_prep.json b/data/vtlib/final_output_noalign_prep.json index 3f28d3aa4..df6bfbdd9 100644 --- a/data/vtlib/final_output_noalign_prep.json +++ b/data/vtlib/final_output_noalign_prep.json @@ -176,7 +176,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ {"subst":"scramble_executable"}, "-7", "-I", "bam", "-O", "cram" ] + "cmd": [ "scramble", "-7", "-I", "bam", "-O", "cram" ] }, { "id":"scramble_tee", diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index eb45b0820..62fbe53ae 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -217,7 +217,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ {"subst":"scramble_executable"}, "-7", "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] + "cmd": [ "scramble", "-7", "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index b6e639553..703d466f1 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -31,7 +31,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - {"subst":"scramble_executable"}, + "scramble", "-7", "-I", "bam", From 64f1ddd985217bf88837b43f231af7ee4e8220a3 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 29 Sep 2015 10:47:45 +0100 Subject: [PATCH 25/40] flatten scramble substitution into command --- data/vtlib/merge_final_output_prep.json | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index 703d466f1..69577a4d6 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -26,22 +26,6 @@ "subst_constructor":{ "vals":[ "-r", {"subst":"scramble_reference_fasta"} ] } }, - { - "id":"scramble_cmd", - "required":"yes", - "subst_constructor":{ - "vals":[ - "scramble", - "-7", - "-I", - "bam", - "-O", - "cram", - {"subst":"scramble_reference_flag"} - ], - "postproc":{"op":"pack"} - } - }, {"id":"stats_filter__F0x900","required":"no","default":"0x900"}, { "id":"samtools_stats_F0x900", @@ -215,7 +199,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":{"subst":"scramble_cmd"} + "cmd":[ "scramble", "-7", "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", From 8e3d7d76da97e6fd0d9b3effcf080eb4249a343b Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 29 Sep 2015 14:53:14 +0100 Subject: [PATCH 26/40] Note change to compression levels --- Changes | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Changes b/Changes index 77a9aa8c0..4aa8fa1f5 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,9 @@ CHANGES LOG ----------- + - scramble compression + - up to 7 for final output cram files + - down to 0 for internal bam streams - port naming conventions (IN/OUT pre- and postfixes) adopted in templates and enforced in viv.pl - vtfp.pl improved error reporting From 0c0c6645025b6a52dc95fd29bbba0a10ed998ea4 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 8 Oct 2015 11:48:35 +0100 Subject: [PATCH 27/40] Parameterised some flags teepot - teepot_vflag (verbosity level) scramble - s2b_compress_level (samtobam compression in alignment templates), b2c_compress_level (bamtocrqam compression in final_output_prep templates) --- ...age2_humansplit_extrasplit_notargetalign_template.json | 2 +- ...ent_wtsi_stage2_humansplit_notargetalign_template.json | 2 +- data/vtlib/alignment_wtsi_stage2_humansplit_template.json | 2 +- data/vtlib/alignment_wtsi_stage2_template.json | 2 +- data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json | 6 +++--- data/vtlib/bwa_aln_alignment.json | 4 ++-- data/vtlib/bwa_aln_se_alignment.json | 4 ++-- data/vtlib/bwa_mem_alignment.json | 2 +- data/vtlib/final_output_noalign_prep.json | 8 ++++---- data/vtlib/final_output_prep.json | 8 ++++---- data/vtlib/merge_final_output_prep.json | 8 ++++---- data/vtlib/post_alignment.json | 4 ++-- data/vtlib/realignment_wtsi_template.json | 2 +- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json index 0dbb8a13c..daac9341b 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json @@ -266,7 +266,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__", "__EXTRA_SPLIT_OUT__" ] + "cmd":["teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__", "__EXTRA_SPLIT_OUT__" ] }, { "id":"pre_alignment_es", diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json index 082dbcaaa..7b4c7318f 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json @@ -199,7 +199,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] }, { "id":"pre_alignment_hs", diff --git a/data/vtlib/alignment_wtsi_stage2_humansplit_template.json b/data/vtlib/alignment_wtsi_stage2_humansplit_template.json index 6dcdbfa3c..3b3064b19 100644 --- a/data/vtlib/alignment_wtsi_stage2_humansplit_template.json +++ b/data/vtlib/alignment_wtsi_stage2_humansplit_template.json @@ -235,7 +235,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__", "__HUMAN_SPLIT_OUT__" ] }, { "id":"pre_alignment_target", diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 2fa3d0dab..edcd46e0b 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -190,7 +190,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"large wait (500 minutes) to avoid unnecessary spill to disk; specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "-m", "1G", "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ] }, { "id":"pre_alignment_target", diff --git a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json index 9355a1775..9c406d1d7 100644 --- a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json +++ b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json @@ -306,7 +306,7 @@ { "id":"tee_decode", "type":"EXEC", - "cmd":[ "teepot", "-v", "-m", "2M", "__TD1_OUT__", "__TD2_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-m", "2M", "__TD1_OUT__", "__TD2_OUT__" ] }, { "id":"simple_cat", @@ -337,7 +337,7 @@ { "id":"prefilter", "type":"EXEC", - "cmd":[ "teepot", "-v", "-t", ".", "-m", "2M", "__PF1_OUT__", "__PF2_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-t", ".", "-m", "2M", "__PF1_OUT__", "__PF2_OUT__" ] }, { "id":"cat2", @@ -368,7 +368,7 @@ { "id":"tee_split", "type":"EXEC", - "cmd":[ "teepot", "-v", "-m", "5M __FILTERED_BAM_OUT__", "__SPLIT_BAM_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-m", "5M __FILTERED_BAM_OUT__", "__SPLIT_BAM_OUT__" ] }, { "id":"filtered_bam", diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index 9b1e1d4be..cddf3ed31 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -25,7 +25,7 @@ { "id":"tee4", "type":"EXEC", - "cmd":[ "teepot", "-v", "-w", "300", "-m", "1G", "__ALN_1_OUT__", "__ALN_2_OUT__", "__SAMPE_1_OUT__", "__SAMPE_2_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-w", "300", "-m", "1G", "__ALN_1_OUT__", "__ALN_2_OUT__", "__SAMPE_1_OUT__", "__SAMPE_2_OUT__" ] }, { "id":"bwa_aln_1", @@ -59,7 +59,7 @@ { "id":"samtobam", "type":"EXEC", - "cmd":[ "scramble", "-0", "-I", "sam", "-O", "bam" ] + "cmd":[ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" ] } ], "edges":[ diff --git a/data/vtlib/bwa_aln_se_alignment.json b/data/vtlib/bwa_aln_se_alignment.json index 985f38e80..ba7758c39 100644 --- a/data/vtlib/bwa_aln_se_alignment.json +++ b/data/vtlib/bwa_aln_se_alignment.json @@ -25,7 +25,7 @@ { "id":"tee2", "type":"EXEC", - "cmd":[ "teepot", "-v", "-w", "300", "-m", "1G", "__ALN_OUT__", "__SAMPE_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-w", "300", "-m", "1G", "__ALN_OUT__", "__SAMPE_OUT__" ] }, { "id":"bwa_aln", @@ -47,7 +47,7 @@ { "id":"samtobam", "type":"EXEC", - "cmd":[ "scramble", "-0", "-I", "sam", "-O", "bam" ] + "cmd":[ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" ] } ], "edges":[ diff --git a/data/vtlib/bwa_mem_alignment.json b/data/vtlib/bwa_mem_alignment.json index fb4e0a954..f82eb3b5a 100644 --- a/data/vtlib/bwa_mem_alignment.json +++ b/data/vtlib/bwa_mem_alignment.json @@ -43,7 +43,7 @@ { "id":"samtobam", "type":"EXEC", - "cmd":["scramble", "-0", "-I", "sam", "-O", "bam"] + "cmd":[ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" ] } ], "edges":[ diff --git a/data/vtlib/final_output_noalign_prep.json b/data/vtlib/final_output_noalign_prep.json index df6bfbdd9..b6547e033 100644 --- a/data/vtlib/final_output_noalign_prep.json +++ b/data/vtlib/final_output_noalign_prep.json @@ -169,21 +169,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ "scramble", "-7", "-I", "bam", "-O", "cram" ] + "cmd":[ "scramble", {"subst":"b2c_compress_level", "ifnull":"-7"}, "-I", "bam", "-O", "cram" ] }, { "id":"scramble_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -241,7 +241,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 62fbe53ae..9d97ce6d4 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -210,21 +210,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__CALIBRATION_PU_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__CALIBRATION_PU_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ "scramble", "-7", "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] + "cmd":[ "scramble", {"subst":"b2c_compress_level", "ifnull":"-7"}, "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -289,7 +289,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index 69577a4d6..366dd4a05 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -192,21 +192,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":[ "scramble", "-7", "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] + "cmd":[ "scramble", {"subst":"b2c_compress_level", "ifnull":"-7"}, "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__CRAM_OUT__", "__CRAI_OUT__", "__MD5_OUT__", "__SEQCHKSUM_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -274,7 +274,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { diff --git a/data/vtlib/post_alignment.json b/data/vtlib/post_alignment.json index 1789d542e..6028b5306 100644 --- a/data/vtlib/post_alignment.json +++ b/data/vtlib/post_alignment.json @@ -28,7 +28,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "-m", "5M", "__HEADER_FIX_OUT__", "__FULL_BAM_OUT__"], + "cmd":["teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "-m", "5M", "__HEADER_FIX_OUT__", "__FULL_BAM_OUT__"], "comment":"get deadlock when tee used here; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -52,7 +52,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":[ "teepot", "-v", "-m", "5M", "-" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-m", "5M", "-" ] }, { "id":"reheader_headerSQfix", diff --git a/data/vtlib/realignment_wtsi_template.json b/data/vtlib/realignment_wtsi_template.json index dd4cd4f81..a71816159 100644 --- a/data/vtlib/realignment_wtsi_template.json +++ b/data/vtlib/realignment_wtsi_template.json @@ -194,7 +194,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ], + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "__PHIX_ALN_OUT__", "__TGT_ALN_OUT__" ], "comment":"specify parameter value teepot_tempdir_value to specif teepot tempdir" }, { From 301d2b38f7780fe7ae8437f20551903c22a8f965 Mon Sep 17 00:00:00 2001 From: Jillian Durham Date: Thu, 8 Oct 2015 15:35:49 +0100 Subject: [PATCH 28/40] bammerge compression level changed to uncompressed --- data/vtlib/merge_aligned.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/vtlib/merge_aligned.json b/data/vtlib/merge_aligned.json index 66580c6c3..a33c42808 100644 --- a/data/vtlib/merge_aligned.json +++ b/data/vtlib/merge_aligned.json @@ -80,7 +80,7 @@ "use_STDIN": false, "use_STDOUT": true, "orig_cmd":{"subst":"crammerge"}, - "cmd": [ "bammerge", "SO=coordinate", "inputformat=cram", "outputformat=bam", {"subst":"incrams"} ], + "cmd": [ "bammerge", "level=0", "SO=coordinate", "inputformat=cram", "outputformat=bam", {"subst":"incrams"} ], "description":"merge individual cram files from a sample into one cram file" }, { From d052c4c3237ff1f2eec93554fb3cfee6278850db Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 9 Oct 2015 17:03:34 +0100 Subject: [PATCH 29/40] corrected splice in finalise_cmd() --- bin/vtfp.pl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/vtfp.pl b/bin/vtfp.pl index 44c9f28dc..344b07bf1 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -1174,11 +1174,13 @@ sub finalise_cmd { for my $i (reverse (0..$#{$cmd})) { if(not defined $cmd->[$i]) { - splice @{$cmd->[$i]}, $i, 1; +# splice @{$cmd->[$i]}, $i, 1; + splice @{$cmd}, $i, 1; } elsif(ref $cmd->[$i] eq q[ARRAY]) { $cmd->[$i] = finalise_cmd($cmd->[$i]); - splice @{$cmd->[$i]}, $i, 1, @{$cmd->[$i]}; +# splice @{$cmd->[$i]}, $i, 1, @{$cmd->[$i]}; + splice @{$cmd}, $i, 1, @{$cmd->[$i]}; } } } From bc58924b2764399f7ecf5c2333a3d008dd77d94d Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 12 Oct 2015 10:07:59 +0100 Subject: [PATCH 30/40] Add parameters for scramble multi-threading and cram version flags --- data/vtlib/bwa_aln_alignment.json | 9 ++++++++- data/vtlib/bwa_aln_se_alignment.json | 9 ++++++++- data/vtlib/bwa_mem_alignment.json | 9 ++++++++- data/vtlib/final_output_noalign_prep.json | 11 +++++++++-- data/vtlib/final_output_prep.json | 11 +++++++++-- data/vtlib/merge_final_output_prep.json | 12 ++++++++++-- data/vtlib/post_alignment.json | 2 +- 7 files changed, 53 insertions(+), 10 deletions(-) diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index cddf3ed31..df6f58410 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -59,7 +59,14 @@ { "id":"samtobam", "type":"EXEC", - "cmd":[ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" ] + "cmd":[ + "scramble", + {"subst":"s2b_compress_level", "ifnull":"-0"}, + {"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}}, + {"subst":"s2b_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"s2b_format_version"} ]}}}, + "-I", "sam", + "-O", "bam" + ] } ], "edges":[ diff --git a/data/vtlib/bwa_aln_se_alignment.json b/data/vtlib/bwa_aln_se_alignment.json index ba7758c39..6e2193485 100644 --- a/data/vtlib/bwa_aln_se_alignment.json +++ b/data/vtlib/bwa_aln_se_alignment.json @@ -47,7 +47,14 @@ { "id":"samtobam", "type":"EXEC", - "cmd":[ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" ] + "cmd":[ + "scramble", + {"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}}, + {"subst":"s2b_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"s2b_format_version"} ]}}}, + {"subst":"s2b_compress_level", "ifnull":"-0"}, + "-I", "sam", + "-O", "bam" + ] } ], "edges":[ diff --git a/data/vtlib/bwa_mem_alignment.json b/data/vtlib/bwa_mem_alignment.json index f82eb3b5a..f08044f46 100644 --- a/data/vtlib/bwa_mem_alignment.json +++ b/data/vtlib/bwa_mem_alignment.json @@ -43,7 +43,14 @@ { "id":"samtobam", "type":"EXEC", - "cmd":[ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" ] + "cmd":[ + "scramble", + {"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}}, + {"subst":"s2b_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"s2b_format_version"} ]}}}, + {"subst":"s2b_compress_level", "ifnull":"-0"}, + "-I", "sam", + "-O", "bam" + ] } ], "edges":[ diff --git a/data/vtlib/final_output_noalign_prep.json b/data/vtlib/final_output_noalign_prep.json index b6547e033..273d8dd9d 100644 --- a/data/vtlib/final_output_noalign_prep.json +++ b/data/vtlib/final_output_noalign_prep.json @@ -169,14 +169,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", {"subst":"fomw_teepot_wval", "ifnull":"7200"}, "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":[ "scramble", {"subst":"b2c_compress_level", "ifnull":"-7"}, "-I", "bam", "-O", "cram" ] + "cmd":[ + "scramble", + {"subst":"b2c_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"b2c_mt_val"} ]}}}, + {"subst":"b2c_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"b2c_format_version"} ]}}}, + {"subst":"b2c_compress_level", "ifnull":"-7"}, + "-I", "bam", + "-O", "cram" + ] }, { "id":"scramble_tee", diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 9d97ce6d4..29b9f4a5d 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -210,14 +210,21 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__CALIBRATION_PU_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", {"subst":"fomw_teepot_wval", "ifnull":"300"}, "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__CALIBRATION_PU_OUT__", "__BAM_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":[ "scramble", {"subst":"b2c_compress_level", "ifnull":"-7"}, "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] + "cmd":[ + "scramble", + {"subst":"b2c_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"b2c_mt_val"} ]}}}, + {"subst":"b2c_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"b2c_format_version"} ]}}}, + {"subst":"b2c_compress_level", "ifnull":"-7"}, + "-I", "bam", + "-O", "cram", + {"subst":"scramble_reference_flag"} ] }, { "id":"scramble_tee", diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index 366dd4a05..cddb9114c 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -192,14 +192,22 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", {"subst":"fomw_teepot_wval", "ifnull":"7200"}, "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":[ "scramble", {"subst":"b2c_compress_level", "ifnull":"-7"}, "-I", "bam", "-O", "cram", {"subst":"scramble_reference_flag"} ] + "cmd":[ + "scramble", + {"subst":"b2c_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"b2c_mt_val"} ]}}}, + {"subst":"b2c_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"b2c_format_version"} ]}}}, + {"subst":"b2c_compress_level", "ifnull":"-7"}, + "-I", "bam", + "-O", "cram", + {"subst":"scramble_reference_flag"} + ] }, { "id":"scramble_tee", diff --git a/data/vtlib/post_alignment.json b/data/vtlib/post_alignment.json index 6028b5306..71381138f 100644 --- a/data/vtlib/post_alignment.json +++ b/data/vtlib/post_alignment.json @@ -28,7 +28,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":["teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "-m", "5M", "__HEADER_FIX_OUT__", "__FULL_BAM_OUT__"], + "cmd":["teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "300", "-m", {"subst":"SQhdr_teepot_mval", "ifnull":"100M"}, "__HEADER_FIX_OUT__", "__FULL_BAM_OUT__"], "comment":"get deadlock when tee used here; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { From 81e947b924af0e15214523f006a57b22cb0c5f0e Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 16 Oct 2015 15:15:11 +0100 Subject: [PATCH 31/40] remove commented code --- bin/vtfp.pl | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/vtfp.pl b/bin/vtfp.pl index 344b07bf1..fc34fea10 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -1174,12 +1174,10 @@ sub finalise_cmd { for my $i (reverse (0..$#{$cmd})) { if(not defined $cmd->[$i]) { -# splice @{$cmd->[$i]}, $i, 1; splice @{$cmd}, $i, 1; } elsif(ref $cmd->[$i] eq q[ARRAY]) { $cmd->[$i] = finalise_cmd($cmd->[$i]); -# splice @{$cmd->[$i]}, $i, 1, @{$cmd->[$i]}; splice @{$cmd}, $i, 1, @{$cmd->[$i]}; } } From 04ecc4e745c5aafccab7629af0c93bea2b22b32e Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 16 Oct 2015 14:50:42 +0100 Subject: [PATCH 32/40] add bamseqchksum RAFILE for cram and change seqchksum_file from OUTFILE to RAFILE and move it between seqchksumtee and cmp_seqchksum. This should prevent blocking problems leading to teepot spillage. --- data/vtlib/final_output_noalign_prep.json | 17 ++++++++++++++--- data/vtlib/final_output_prep.json | 17 ++++++++++++++--- data/vtlib/merge_final_output_prep.json | 17 ++++++++++++++--- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/data/vtlib/final_output_noalign_prep.json b/data/vtlib/final_output_noalign_prep.json index df6bfbdd9..49ad107f1 100644 --- a/data/vtlib/final_output_noalign_prep.json +++ b/data/vtlib/final_output_noalign_prep.json @@ -114,6 +114,15 @@ "postproc":{"op":"concat", "pad":""} } }, + { + "id":"seqchksum_file_cram", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"fopid"}, ".cram.seqchksum" ], + "postproc":{"op":"concat", "pad":""} + }, + "comment":"this temporary file is used for removing blocking problems at cmp_seqchksum" + }, { "id":"seqchksum_extrahash_file", "required":"yes", @@ -241,7 +250,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -264,6 +273,7 @@ { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, { "id":"cram_md5", "type":"OUTFILE", "name":{"subst":"cram_md5"} }, { "id":"seqchksum_file", "type":"OUTFILE", "name":{"subst":"seqchksum_file"} }, + { "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" }, { "id":"seqchksum_extrahash_file", "type":"OUTFILE", "name":{"subst":"seqchksum_extrahash_file"} }, { "id":"bamcheck_file", "type":"OUTFILE", "name":{"subst":"bamcheck_file"} }, { "id":"stats_F0x900_file", "type":"OUTFILE", "name":{"subst":"stats_F0x900_file"} }, @@ -313,11 +323,12 @@ { "id":"bamcheck_to_file", "from":"bamcheck", "to":"bamcheck_file" }, { "id":"scs_to_tee", "from":"seqchksum", "to":"seqchksum_tee" }, { "id":"scs_tee_to_file", "from":"seqchksum_tee:__FILE_OUT__", "to":"seqchksum_file" }, - { "id":"scs_tee_to_cmp", "from":"seqchksum_tee:__SEQCHKSUM_OUT__", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, + { "id":"scs_file_to_cmp", "from":"seqchksum_file", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, { "id":"scs_extrahash_to_file", "from":"seqchksum_extrahash", "to":"seqchksum_extrahash_file" }, { "id":"samtools_stats_F0x900_to_file", "from":"samtools_stats_F0x900", "to":"stats_F0x900_file" }, { "id":"samtools_stats_F0xB00_to_file", "from":"samtools_stats_F0xB00", "to":"stats_F0xB00_file" }, { "id":"flagstat_to_file", "from":"flagstat", "to":"flagstat_file" }, - { "id":"cscs_to_cmp", "from":"cram_seqchksum", "to":"cmp_seqchksum:__CRAM_SEQCHKSUM_IN__" } + { "id":"cscs_to_file", "from":"cram_seqchksum", "to":"seqchksum_file_cram" }, + { "id":"cscs_file_to_cmp", "from":"seqchksum_file_cram", "to":"cmp_seqchksum:__CRAM_SEQCHKSUM_IN__" } ] } diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index 62fbe53ae..636685d03 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -148,6 +148,15 @@ "postproc":{"op":"concat", "pad":""} } }, + { + "id":"seqchksum_file_cram", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"fopid"}, ".cram.seqchksum" ], + "postproc":{"op":"concat", "pad":""} + }, + "comment":"this temporary file is used for removing blocking problems at cmp_seqchksum" + }, { "id":"seqchksum_extrahash_file", "required":"yes", @@ -289,7 +298,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -319,6 +328,7 @@ { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, { "id":"cram_md5", "type":"OUTFILE", "name":{"subst":"cram_md5"} }, { "id":"seqchksum_file", "type":"OUTFILE", "name":{"subst":"seqchksum_file"} }, + { "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" }, { "id":"seqchksum_extrahash_file", "type":"OUTFILE", "name":{"subst":"seqchksum_extrahash_file"} }, { "id":"bamcheck_file", "type":"OUTFILE", "name":{"subst":"bamcheck_file"} }, { "id":"stats_F0x900_file", "type":"OUTFILE", "name":{"subst":"stats_F0x900_file"} }, @@ -371,11 +381,12 @@ { "id":"bamcheck_to_file", "from":"bamcheck", "to":"bamcheck_file" }, { "id":"scs_to_tee", "from":"seqchksum", "to":"seqchksum_tee" }, { "id":"scs_tee_to_file", "from":"seqchksum_tee:__FILE_OUT__", "to":"seqchksum_file" }, - { "id":"scs_tee_to_cmp", "from":"seqchksum_tee:__SEQCHKSUM_OUT__", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, + { "id":"scs_file_to_cmp", "from":"seqchksum_file", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, { "id":"scs_extrahash_to_file", "from":"seqchksum_extrahash", "to":"seqchksum_extrahash_file" }, { "id":"samtools_stats_F0x900_to_file", "from":"samtools_stats_F0x900", "to":"stats_F0x900_file" }, { "id":"samtools_stats_F0xB00_to_file", "from":"samtools_stats_F0xB00", "to":"stats_F0xB00_file" }, { "id":"flagstat_to_file", "from":"flagstat", "to":"flagstat_file" }, - { "id":"cscs_to_cmp", "from":"cram_seqchksum", "to":"cmp_seqchksum:__CRAM_SEQCHKSUM_IN__" } + { "id":"cscs_to_file", "from":"cram_seqchksum", "to":"seqchksum_file_cram" }, + { "id":"cscsfile_to_cmp", "from":"seqchksum_file_cram", "to":"cmp_seqchksum:__CRAM_SEQCHKSUM_IN__" } ] } diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index 69577a4d6..723b230de 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -138,6 +138,15 @@ "postproc":{"op":"concat", "pad":""} } }, + { + "id":"seqchksum_file_cram", + "required":"yes", + "subst_constructor":{ + "vals":[ {"subst":"tmpdir"}, "/", {"subst":"library"}, ".cram.seqchksum" ], + "postproc":{"op":"concat", "pad":""} + }, + "comment":"this temporary file is used for removing blocking problems at cmp_seqchksum" + }, { "id":"seqchksum_extrahash_file", "required":"yes", @@ -274,7 +283,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__SEQCHKSUM_OUT__", "__FINAL_OUT__" ], + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, { @@ -289,6 +298,7 @@ { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, { "id":"cram_md5", "type":"OUTFILE", "name":{"subst":"cram_md5"} }, { "id":"seqchksum_file", "type":"OUTFILE", "name":{"subst":"seqchksum_file"} }, + { "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" }, { "id":"seqchksum_extrahash_file", "type":"OUTFILE", "name":{"subst":"seqchksum_extrahash_file"} }, { "id":"bamcheck_file", "type":"OUTFILE", "name":{"subst":"bamcheck_file"} }, { "id":"stats_F0x900_file", "type":"OUTFILE", "name":{"subst":"stats_F0x900_file"} }, @@ -338,11 +348,12 @@ { "id":"bamcheck_to_file", "from":"bamcheck", "to":"bamcheck_file" }, { "id":"scs_to_tee", "from":"seqchksum", "to":"seqchksum_tee" }, { "id":"scs_tee_to_file", "from":"seqchksum_tee:__FILE_OUT__", "to":"seqchksum_file" }, - { "id":"scs_tee_to_cmp", "from":"seqchksum_tee:__SEQCHKSUM_OUT__", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, + { "id":"scs_file_to_cmp", "from":"seqchksum_file", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, { "id":"scs_extrahash_to_file", "from":"seqchksum_extrahash", "to":"seqchksum_extrahash_file" }, { "id":"samtools_stats_F0x900_to_file", "from":"samtools_stats_F0x900", "to":"stats_F0x900_file" }, { "id":"samtools_stats_F0xB00_to_file", "from":"samtools_stats_F0xB00", "to":"stats_F0xB00_file" }, { "id":"flagstat_to_file", "from":"flagstat", "to":"flagstat_file" }, - { "id":"cscs_to_cmp", "from":"cram_seqchksum", "to":"cmp_seqchksum:__CRAM_SEQCHKSUM_IN__" } + { "id":"cscs_to_file", "from":"cram_seqchksum", "to":"seqchksum_file_cram" }, + { "id":"cscs_file_to_cmp", "from":"seqchksum_file_cram", "to":"cmp_seqchksum:__CRAM_SEQCHKSUM_IN__" } ] } From 8b2cb7ec05553461d87df9198ccafefb853e9fdc Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 19 Oct 2015 11:42:36 +0100 Subject: [PATCH 33/40] remove -V (cram format version) flag from scramble commands where output is BAM --- data/vtlib/bwa_aln_alignment.json | 1 - data/vtlib/bwa_aln_se_alignment.json | 1 - data/vtlib/bwa_mem_alignment.json | 1 - 3 files changed, 3 deletions(-) diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index df6f58410..1877bc876 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -63,7 +63,6 @@ "scramble", {"subst":"s2b_compress_level", "ifnull":"-0"}, {"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}}, - {"subst":"s2b_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"s2b_format_version"} ]}}}, "-I", "sam", "-O", "bam" ] diff --git a/data/vtlib/bwa_aln_se_alignment.json b/data/vtlib/bwa_aln_se_alignment.json index 6e2193485..b37bb4030 100644 --- a/data/vtlib/bwa_aln_se_alignment.json +++ b/data/vtlib/bwa_aln_se_alignment.json @@ -50,7 +50,6 @@ "cmd":[ "scramble", {"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}}, - {"subst":"s2b_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"s2b_format_version"} ]}}}, {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" diff --git a/data/vtlib/bwa_mem_alignment.json b/data/vtlib/bwa_mem_alignment.json index f08044f46..309327f02 100644 --- a/data/vtlib/bwa_mem_alignment.json +++ b/data/vtlib/bwa_mem_alignment.json @@ -46,7 +46,6 @@ "cmd":[ "scramble", {"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}}, - {"subst":"s2b_fmtver", "ifnull":{"subst_constructor":{ "vals":[ "-V", {"subst":"s2b_format_version"} ]}}}, {"subst":"s2b_compress_level", "ifnull":"-0"}, "-I", "sam", "-O", "bam" From f9bb03f37a25209fad67cb77d1783d52b7accc63 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Mon, 19 Oct 2015 15:01:04 +0100 Subject: [PATCH 34/40] Fully split arguments to a teepot one had been missed --- data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json index 9c406d1d7..9049b162f 100644 --- a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json +++ b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json @@ -368,7 +368,7 @@ { "id":"tee_split", "type":"EXEC", - "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-m", "5M __FILTERED_BAM_OUT__", "__SPLIT_BAM_OUT__" ] + "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, "-m", "5M", "__FILTERED_BAM_OUT__", "__SPLIT_BAM_OUT__" ] }, { "id":"filtered_bam", From d8ec5a3bbd0862750e5165647fa63ecf0e8bb97e Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Mon, 19 Oct 2015 17:52:26 +0100 Subject: [PATCH 35/40] remove extra cmd attribute from seqchksum_tee node --- data/vtlib/merge_final_output_prep.json | 1 - 1 file changed, 1 deletion(-) diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index c5a54a377..65c9c38a6 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -291,7 +291,6 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__FINAL_OUT__" ], "cmd":[ "teepot", {"subst":"teepot_vflag", "ifnull":"-v"}, {"subst":"teepot_tempdir_flag"}, "-w", "30000", "__FILE_OUT__", "__FINAL_OUT__" ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, From a0a9e1450daf80653db45607efcea7f39450ec1d Mon Sep 17 00:00:00 2001 From: Jillian Durham Date: Mon, 19 Oct 2015 17:55:06 +0100 Subject: [PATCH 36/40] bamcheck removed --- data/vtlib/merge_final_output_prep.json | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/data/vtlib/merge_final_output_prep.json b/data/vtlib/merge_final_output_prep.json index 723b230de..f48e16f44 100644 --- a/data/vtlib/merge_final_output_prep.json +++ b/data/vtlib/merge_final_output_prep.json @@ -163,14 +163,6 @@ }, "comment":"default hash type is currently sha512primesums512" }, - { - "id":"bamcheck_file", - "required":"yes", - "subst_constructor":{ - "vals":[ {"subst":"outdatadir"}, "/", {"subst":"library"}, ".bamcheck" ], - "postproc":{"op":"concat", "pad":""} - } - }, { "id":"stats_F0x900_file", "subst_constructor":{ @@ -201,7 +193,7 @@ "use_STDIN": true, "use_STDOUT": false, "comment":"specify parameter value teepot_tempdir_value to specify teepot tempdir", - "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__BAMCHECK_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] + "cmd":[ "teepot", "-v", {"subst":"teepot_tempdir_flag"}, "-w", "300", "__SCRAMBLE_OUT__", "__FLAGSTAT_OUT__", "__SAMTOOLS_STATS_F0x900_OUT__", "__SAMTOOLS_STATS_F0xB00_OUT__", "__SEQCHKSUM_OUT__", "__SEQCHKSUM_EXTRAHASH_OUT__" ] }, { "id":"scramble", @@ -247,13 +239,6 @@ "use_STDOUT": true, "cmd":[ "bamseqchksum", "inputformat=cram" ] }, - { - "id":"bamcheck", - "type":"EXEC", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":[ "bamcheck", "-F", "0x900" ] - }, { "id":"samtools_stats_F0x900", "type":"EXEC", @@ -300,7 +285,6 @@ { "id":"seqchksum_file", "type":"OUTFILE", "name":{"subst":"seqchksum_file"} }, { "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" }, { "id":"seqchksum_extrahash_file", "type":"OUTFILE", "name":{"subst":"seqchksum_extrahash_file"} }, - { "id":"bamcheck_file", "type":"OUTFILE", "name":{"subst":"bamcheck_file"} }, { "id":"stats_F0x900_file", "type":"OUTFILE", "name":{"subst":"stats_F0x900_file"} }, { "id":"stats_F0xB00_file", "type":"OUTFILE", "name":{"subst":"stats_F0xB00_file"} }, { "id":"flagstat_file", "type":"OUTFILE", "name":{"subst":"flagstat_file"} }, @@ -336,7 +320,6 @@ { "id":"scramble_tee_to_crai", "from":"scramble_tee:__CRAI_OUT__", "to":"cram_index" }, { "id":"scramble_tee_to_bscs", "from":"scramble_tee:__SEQCHKSUM_OUT__", "to":"cram_seqchksum" }, { "id":"md5_to_postprocess", "from":"scramble_md5", "to":"postprocess_md5" }, - { "id":"bmdmw_to_bamcheck", "from":"bmd_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, { "id":"bmdmw_to_sts_F0x900", "from":"bmd_multiway:__SAMTOOLS_STATS_F0x900_OUT__", "to":"samtools_stats_F0x900" }, { "id":"bmdmw_to_sts_F0xB00", "from":"bmd_multiway:__SAMTOOLS_STATS_F0xB00_OUT__", "to":"samtools_stats_F0xB00" }, { "id":"bmdmw_to_seqchksum", "from":"bmd_multiway:__SEQCHKSUM_OUT__", "to":"seqchksum" }, @@ -345,7 +328,6 @@ { "id":"flagstat_filter_to_flagstat", "from":"flagstat_filter", "to":"flagstat" }, { "id":"tee_to_cram", "from":"scramble_tee:__CRAM_OUT__", "to":"cram_file" }, { "id":"corrected_md5_out", "from":"postprocess_md5", "to":"cram_md5" }, - { "id":"bamcheck_to_file", "from":"bamcheck", "to":"bamcheck_file" }, { "id":"scs_to_tee", "from":"seqchksum", "to":"seqchksum_tee" }, { "id":"scs_tee_to_file", "from":"seqchksum_tee:__FILE_OUT__", "to":"seqchksum_file" }, { "id":"scs_file_to_cmp", "from":"seqchksum_file", "to":"cmp_seqchksum:__BAM_SEQCHKSUM_IN__" }, From 2aaa5a328522354281ef0861370f8f580d451fec Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 21 Oct 2015 09:34:34 +0100 Subject: [PATCH 37/40] update Changes in preparation for release 0.17.1 --- Changes | 1 + 1 file changed, 1 insertion(+) diff --git a/Changes b/Changes index c4c9c976d..8389a7940 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,7 @@ CHANGES LOG ----------- +release 0.17.1 - scramble compression - up to 7 for final output cram files - down to 0 for internal bam streams From 83d602c7aac040a77f28dd148c3542719822aab8 Mon Sep 17 00:00:00 2001 From: Jillian Durham Date: Wed, 21 Oct 2015 11:08:57 +0100 Subject: [PATCH 38/40] seqchksum_merge.pl command update --- Changes | 2 +- data/vtlib/merge_aligned.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Changes b/Changes index c4c9c976d..489d1dcd1 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,6 @@ CHANGES LOG ----------- - + - -n 1 flag added to seqchksum_merge.pl command in merge_aligned template to allow different tags in column 1 - scramble compression - up to 7 for final output cram files - down to 0 for internal bam streams diff --git a/data/vtlib/merge_aligned.json b/data/vtlib/merge_aligned.json index a33c42808..2f3e21bfa 100644 --- a/data/vtlib/merge_aligned.json +++ b/data/vtlib/merge_aligned.json @@ -97,7 +97,7 @@ "use_STDIN": false, "use_STDOUT": true, "orig_cmd":{"subst":"merge_seqchksum"}, - "cmd":[ "seqchksum_merge.pl", {"subst":"incrams_seqchksum"} ], + "cmd":[ "seqchksum_merge.pl", "-n 1",{"subst":"incrams_seqchksum"} ], "description": "merge individual cram seqchksum (crc32prod) files" }, { From 56cea4ee55f8b9a1ef525360b02bf5dd8109fe9b Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 21 Oct 2015 14:21:06 +0100 Subject: [PATCH 39/40] Update Changes reinsert release 0.17.1 line --- Changes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Changes b/Changes index 489d1dcd1..21e8b631b 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,7 @@ CHANGES LOG ----------- + +release 0.17 - -n 1 flag added to seqchksum_merge.pl command in merge_aligned template to allow different tags in column 1 - scramble compression - up to 7 for final output cram files From deac6d83fca587b44b1a81b4d7a92b1635deafee Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 21 Oct 2015 15:03:34 +0100 Subject: [PATCH 40/40] Update Changes --- Changes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changes b/Changes index 21e8b631b..c5ef5e452 100644 --- a/Changes +++ b/Changes @@ -1,7 +1,7 @@ CHANGES LOG ----------- -release 0.17 +release 0.17.1 - -n 1 flag added to seqchksum_merge.pl command in merge_aligned template to allow different tags in column 1 - scramble compression - up to 7 for final output cram files