diff --git a/Changes b/Changes index 3057c43ca..b09ee866a 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,12 @@ CHANGES LOG ----------- +0.27.0 + - check for divide by 0 when calc samtools subsample value + - drop file check, explictly assume fastq.gz files are gzipped when calc #reads for salmon + - make the code flow more robust in cases of insufficient reads for salmon + - unconditionally remove auxtags before adapter clipping when realignment_switch is 1 + 0.26.0 - add parameters file for top-up merge - functional equivalence: enable selection of markdup method - biobambam (default), samtools or picard diff --git a/data/vtlib/pre_alignment.json b/data/vtlib/pre_alignment.json index 61cfc1405..dd6d970d6 100644 --- a/data/vtlib/pre_alignment.json +++ b/data/vtlib/pre_alignment.json @@ -25,13 +25,12 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":[ - "bamreset", - {"subst":"resetaux_flag","required":"no","ifnull":{"subst_constructor":{"vals":[ "resetaux", {"subst":"resetaux_val", "required":"no", "ifnull":"0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"auxfilter_flag","required":"no"}, - "level=0", - "verbose=0" - ], + "cmd":{"select":"realignment_switch", "select_range":[1], "default":0, "comment":"remove aux tags unconditionally for realignment (realignment is non-default)", + "cases":[ + ["bamreset", "resetaux=0", {"subst":"auxfilter_flag","required":"no", "comment":"auxfilter=comma separated list of aux tags to be kept if resetaux=0"}, "level=0", "verbose=0"], + ["bamreset", "level=0", "verbose=0"] + ] + }, "comment":"Alignment removal also required for bamadapterclip (at least 0.0.142)" }, { diff --git a/data/vtlib/salmon_alignment.json b/data/vtlib/salmon_alignment.json index 968784fa9..61db99e0e 100644 --- a/data/vtlib/salmon_alignment.json +++ b/data/vtlib/salmon_alignment.json @@ -99,16 +99,16 @@ "--geneMap", {"subst":"annotation_val", "required":"yes"}, "--output", {"subst":"salmon_out"}, {"subst":"b2c_mt", "ifnull":{"subst_constructor":{ "vals":[ "-p", {"subst":"b2c_mt_val"} ]}}}, "\";", - "case `file $0` in *ASCII*) PART1=`head -n 1000 $0 | wc -l`;; *compressed*) PART1=`gunzip -c $0 | head -n 1000 | wc -l`;; *empty*) PART1=0;; esac;", + "PART1=`gunzip -c $0 | head -n 1000 | wc -l`;", "if [[ $0 && ! $1 ]]; then", "SALMON_CMD+=\"-r $0\";", "if [[ $PART1 -lt 1000 ]]; then", - ">&2 printf \"Not enough reads to run Salmon: fq: %s\" \"$((PART1/4))\"; exit 0; fi;", + ">&2 printf \"Not enough reads to run Salmon: fq: %s\\n\" \"$((PART1/4))\"; echo 'NO_ARCHIVE_SE'; exit 0; fi;", "elif [[ $0 && $1 ]]; then", "SALMON_CMD+=\"-1 $0 -2 $1\";", - "case `file $1` in *ASCII*) PART2=`head -n 1000 $1 | wc -l`;; *compressed*) PART2=`gunzip -c $1 | head -n 1000 | wc -l`;; *empty*) PART2=0;; esac;", + "PART2=`gunzip -c $1 | head -n 1000 | wc -l`;", "if [[ $PART1 -lt 1000 || $PART2 -lt 1000 ]]; then", - ">&2 printf \"Not enough reads to run Salmon: fq1: %s - fq2: %s\" \"$((PART1/4))\" \"$((PART2/4))\"; exit 0; fi; fi;", + ">&2 printf \"Not enough reads to run Salmon: fq1: %s - fq2: %s\\n\" \"$((PART1/4))\" \"$((PART2/4))\"; echo 'NO_ARCHIVE_PE'; exit 0; fi; fi;", "$SALMON_CMD'", { "select":"alignment_reads_layout", @@ -118,38 +118,42 @@ "1":[{"port":"fq1", "direction":"in"}], "2":[{"port":"fq1", "direction":"in"}, {"port":"fq2", "direction":"in"}] } - } + }, + "&& echo 'DO_ARCHIVE';" ], "comment":"salmon is too fussy and requires a minimum of good reads to work or it throws a fit. wrapped in a bash script to validate fastq files" }, { "id":"zip_salmon_quant", "type":"EXEC", + "subtype":"STRINGIFY", "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "zip", "-r", {"subst":"zip_target"}, {"subst":"quant"}, {"subst":"quant_genes"}, {"subst":"lib_format_counts"}, {"subst":"libparams"}, {"subst":"cmd_info"} ] + "cmd":[ + "bash -c 'ms=`cat`; if [ \"${ms}\" == \"DO_ARCHIVE\" ]; then >&2 printf \"Found archive instruction, so zipping\\n\"; zip -r $0 $1 $2 $3 $4 $5; else >&2 printf \"No such file, so no zip: %s\\n\" $1; exit 0; fi'", {"subst":"zip_target"}, {"subst":"quant"}, {"subst":"quant_genes"}, {"subst":"lib_format_counts"}, {"subst":"libparams"}, {"subst":"cmd_info"} + ] }, { - "id":"quant_genes", + "id":"archive_decision", "type":"RAFILE", - "subtype":"DUMMY", - "name":{"subst":"quant_genes"} + "stubtype":"DUMMY", + "name":{"subst":"archive_decision", "ifnull":"archive_decision.txt"} }, { "id":"cp_quant_genes", "type":"EXEC", "subtype":"STRINGIFY", - "use_STDIN": false, + "use_STDIN": true, "use_STDOUT": false, - "cmd":[ "bash -c 'if [ -e $0 ]; then cp $0 $1; else >&2 printf \"No such file: %s\" $0; exit 0; fi'", - {"port":"src_quant_genes", "direction":"in"}, {"subst":"quant_genes_target"} + "cmd":[ "bash -c 'ms=`cat`; if [[ \"${ms}\" == \"DO_ARCHIVE\" ]] && [[ -e $0 ]]; then >&2 cp -v $0 $1; else >&2 printf \"No quant_genes file: %s\\n\" $0; exit 0; fi'", + {"subst":"quant_genes"}, {"subst":"quant_genes_target"} ], - "comment":"if salmon is not run this file is not created" + "comment":"if salmon is not run the quant_genes file is not created" } ], "edges":[ - { "id":"zip_salmon_output", "from":"quant_genes", "to":"zip_salmon_quant"}, - { "id":"salmon_to_quant_genes", "from":"salmon", "to":"quant_genes" }, - { "id":"cp_quant_genes", "from":"quant_genes", "to":"cp_quant_genes:src_quant_genes" } + { "id":"salmon_to_quant_genes", "from":"salmon", "to":"archive_decision" }, + { "id":"zip_salmon_output", "from":"archive_decision", "to":"zip_salmon_quant"}, + { "id":"cp_quant_genes", "from":"archive_decision", "to":"cp_quant_genes" } ] } diff --git a/data/vtlib/subsample.json b/data/vtlib/subsample.json index a537d1f88..271cf6ca2 100644 --- a/data/vtlib/subsample.json +++ b/data/vtlib/subsample.json @@ -42,7 +42,7 @@ "use_STDOUT": true, "cmd":[ "bash -c '", - {"subst_constructor":{"vals":["tmfs=\"", {"subst":"tag_metrics_files", "required":true}, "\""],"postproc":{"op":"concat","pad":""}}}, "; if [ ! -z \"${tmfs}\" ]; then for tag_metrics_file in ${tmfs}; do reads_count=`jq", {"subst":"jqkey", "ifnull":{"subst_constructor":{"vals":["'\"'\"'.reads_count.\"", {"subst":"s2_tag_index", "required":true}, "\"'\"'\"'"],"postproc":{"op":"concat","pad":""}}}}, "${tag_metrics_file}`; reads_count=`echo ${reads_count} | tr -cd [:digit:]`; reads_count_total=$((${reads_count_total}+${reads_count})); done; frac=`echo \"10000/${reads_count_total}\" | bc -l`; fi;", + {"subst_constructor":{"vals":["tmfs=\"", {"subst":"tag_metrics_files", "required":true}, "\""],"postproc":{"op":"concat","pad":""}}}, "; if [ ! -z \"${tmfs}\" ]; then for tag_metrics_file in ${tmfs}; do reads_count=`jq", {"subst":"jqkey", "ifnull":{"subst_constructor":{"vals":["'\"'\"'.reads_count.\"", {"subst":"s2_tag_index", "required":true}, "\"'\"'\"'"],"postproc":{"op":"concat","pad":""}}}}, "${tag_metrics_file}`; reads_count=`echo ${reads_count} | tr -cd [:digit:]`; reads_count_total=$((${reads_count_total}+${reads_count})); done; if [[ $reads_count_total -eq 0 ]]; then reads_count_total=1; fi; frac=`echo \"10000/${reads_count_total}\" | bc -l`; fi;", "if [ ! -z $frac ]; then", "samtools", "view",