From fd22914189db2694cc7355a694044e5cb711b72b Mon Sep 17 00:00:00 2001 From: Frederick Dodd Date: Fri, 30 Apr 2021 15:47:36 +0100 Subject: [PATCH 1/3] Changing to Github Actions --- .../workflows/testing_and_building_repo.yml | 69 +++++++++++++++++++ .travis.yml | 27 -------- MANIFEST | 58 ++++++++++++++-- scripts/install_npg_perl_dependencies.sh | 51 ++++++++++++++ 4 files changed, 174 insertions(+), 31 deletions(-) create mode 100644 .github/workflows/testing_and_building_repo.yml delete mode 100644 .travis.yml create mode 100755 scripts/install_npg_perl_dependencies.sh diff --git a/.github/workflows/testing_and_building_repo.yml b/.github/workflows/testing_and_building_repo.yml new file mode 100644 index 000000000..1d2d7ceb6 --- /dev/null +++ b/.github/workflows/testing_and_building_repo.yml @@ -0,0 +1,69 @@ +name: testing_and_building_repo +on: [push, pull_request] +jobs: + build: + strategy: + matrix: + os: ['ubuntu-18.04'] + + runs-on: ${{matrix.os}} + name: Distribution Perl on ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + + # Caching cpanm external modules + - name: Cache cpanm external modules + id: cpanmCache + uses: actions/cache@v2 + with: + path: ~/perl5ext + key: ${{ matrix.os}}-build-cpanm-external + + - name: install cpanm + run: | + wget -qO - https://cpanmin.us | /usr/bin/perl - --sudo App::cpanminus + + # run the perl install script + - name: install NPG Perl dependencies, and their CPAN dependencies + run: | + cpanm --local-lib=~/perl5ext local::lib && eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib) + ${GITHUB_WORKSPACE}/scripts/install_npg_perl_dependencies.sh $WTSI_NPG_GITHUB_URL $WTSI_NPG_BUILD_BRANCH + env: + WTSI_NPG_GITHUB_URL: https://github.com/wtsi-npg + WTSI_NPG_BUILD_BRANCH: ${GITHUB_HEAD_REF} + + - name: install cpanm dependencies + run: | + eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5npg) + eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5ext) + cpanm --installdeps --notest . + + - name: run Build.PL and ./Build + run: | + eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5ext) + eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5npg) + export TEST_AUTHOR=1 + perl Build.PL + ./Build test --verbose + ./Build install + + # running ./Build dist and exporting files + - name: run ./Build dist + run: | + eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5ext) + export TEST_AUTHOR=1 + ./Build dist + export DIST_FILE=$(ls p4-*.tar.gz) + export MD5_FILE=$DIST_FILE.md5 + md5sum $DIST_FILE > $MD5_FILE + export SHA256_FILE=$DIST_FILE.sha256 + shasum -a 256 $DIST_FILE > $SHA256_FILE + + # Archive logs if failure + - name: Archive CPAN logs + if: ${{ failure() }} + uses: actions/upload-artifact@v2 + with: + name: cpan_log + path: /home/runner/.cpanm/work/*/build.log + retention-days: 5 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1a6a76f54..000000000 --- a/.travis.yml +++ /dev/null @@ -1,27 +0,0 @@ -sudo: false -language: perl -dist: - - bionic -perl: -- "5.26-shrplib" - -env: - global: - - secure: "Yaiv+tTD2FnvN2UwA9yEVIzLQMXR7yhLkuzJ9521MNM354SqKJUFtMLbQfdA7Rld5npylBSl1xfUOwjiraDBvtbjnC6PKpaz2fyR8/ycGptMjbWncxjNiSIxYelmw/JD9qu74Z2yIKzty7O4WqJ1IbP3OrQd/aTYViekibl7hIo=" - -after_success: - - ./Build dist - - export DIST_FILE=$(ls p4-*.tar.gz) - - export MD5_FILE=$DIST_FILE.md5 - - md5sum $DIST_FILE > $MD5_FILE - - export SHA256_FILE=$DIST_FILE.sha256 - - shasum -a 256 $DIST_FILE > $SHA256_FILE - -deploy: - provider: releases - api-key: $GH_OAUTH - file: - - $DIST_FILE - - $MD5_FILE - - $SHA256_FILE - skip_cleanup: true diff --git a/MANIFEST b/MANIFEST index 5f35cf90f..46f7fdaad 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,3 +1,4 @@ +.github/workflows/testing_and_building_repo.yml bin/viv.pl bin/vtfp.pl Build.PL @@ -7,36 +8,59 @@ data/bwa_post_proc.vtf data/hiseqx.vtf data/post_alignment.vtf data/vtlib/alignment_common.json +data/vtlib/alignment_wtsi_stage2_humansplit_extrasplit_notargetalign_template.json data/vtlib/alignment_wtsi_stage2_humansplit_notargetalign_template.json data/vtlib/alignment_wtsi_stage2_humansplit_template.json data/vtlib/alignment_wtsi_stage2_template.json data/vtlib/auxmerge_prep.json data/vtlib/auxmerge_prep_realign.json +data/vtlib/bamindexdecoder.json data/vtlib/basic_params_top_up_merge.json +data/vtlib/bcftools_genotype_call.json data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json data/vtlib/bwa_aln_alignment.json data/vtlib/bwa_aln_se_alignment.json data/vtlib/bwa_mem_alignment.json +data/vtlib/bwa_mem_bwakit_alignment.json data/vtlib/final_output_noalign_prep.json data/vtlib/final_output_prep.json +data/vtlib/final_output_prep_chrsplit_noaln.json +data/vtlib/final_stage1_process.json +data/vtlib/final_stage1_process_nosplit.json +data/vtlib/hisat2_alignment.json +data/vtlib/markdup_biobambam.json +data/vtlib/markdup_duplexseq.json +data/vtlib/markdup_none.json +data/vtlib/markdup_picard.json +data/vtlib/markdup_samtools.json data/vtlib/merge_aligned.json data/vtlib/merge_final_output_prep.json +data/vtlib/minimap2_alignment.json +data/vtlib/passthrough.json data/vtlib/post_alignment.json +data/vtlib/post_alignment_realignment.json data/vtlib/pre_alignment.json data/vtlib/pre_alignment_realign.json data/vtlib/README.vtlib +data/vtlib/realignment_wtsi_humansplit_notargetalign_template.json +data/vtlib/realignment_wtsi_humansplit_template.json data/vtlib/realignment_wtsi_template.json data/vtlib/salmon_alignment.json data/vtlib/seqchksum.json +data/vtlib/seqchksum_ehs.json data/vtlib/seqchksum_hs.json data/vtlib/seqchksum_realign.json +data/vtlib/seqchksum_stage1.json data/vtlib/split_by_chromosome.json data/vtlib/star_alignment.json +data/vtlib/subsample.json +data/vtlib/target_alignment.json +data/vtlib/target_nchs_alignment.json data/vtlib/tophat2_alignment.json -data/vtlib/markdup_biobambam.json -data/vtlib/markdup_duplexseq.json -data/vtlib/markdup_picard.json -data/vtlib/markdup_samtools.json +data/vtlib/unaln_intfile.json +data/vtlib/unaln_intfile4.json +data/vtlib/unaln_tee3.json +data/vtlib/unaln_tee4.json examples/bwa_aln_cfg.png examples/bwa_mem/bwa_mem_alignment.vtf examples/bwa_mem/generic_alignment_with_phix.vtf @@ -53,14 +77,40 @@ examples/tophat2_cfg.png MANIFEST This list of files README README.vtfp +scripts/install_npg_perl_dependencies.sh t/00-scripts_compile.t +t/10-viv-noop_edge.t +t/10-viv-packflag.t +t/10-viv-stringify.t +t/10-viv-teenode.t t/10-vtfp-array_expansion.t +t/10-vtfp-noop.t t/10-vtfp-param_ring.t t/10-vtfp-pv.t +t/10-vtfp-select_directive.t +t/10-vtfp-select_directive.v2.t +t/10-vtfp-splice_nodes.t t/10-vtfp-subst_directive.t +t/10-vtfp-subst_directive.v2.t +t/10-vtfp-vtfile.t +t/10-vtfp-vtfile_v2.t +t/50-viv.t t/data/10-vtfp-array_expansion.json +t/data/10-vtfp-array_expansion.v2.json +t/data/10-vtfp-noop.json t/data/10-vtfp-param_ring.json t/data/10-vtfp-pv.json +t/data/10-vtfp-splice_nodes_00.json +t/data/50-viv_failing_io_pipeline0.v2.vtf +t/data/50-viv_failing_io_pipeline0.vtf +t/data/50-viv_failing_io_pipeline1.v2.vtf +t/data/50-viv_failing_io_pipeline1.vtf +t/data/50-viv_failing_io_pipeline2.v2.vtf +t/data/50-viv_failing_io_pipeline2.vtf +t/data/50-viv_failing_pipeline.v2.vtf +t/data/50-viv_failing_pipeline.vtf +t/data/50-viv_pipeline.v2.vtf +t/data/50-viv_pipeline.vtf visualisation/bwa_aln.cfg visualisation/bwa_aln.json visualisation/cgi-bin/getProgress diff --git a/scripts/install_npg_perl_dependencies.sh b/scripts/install_npg_perl_dependencies.sh new file mode 100755 index 000000000..617f09099 --- /dev/null +++ b/scripts/install_npg_perl_dependencies.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +set -e -x + +WTSI_NPG_GITHUB_URL=$1 +WTSI_NPG_BUILD_BRANCH=$2 + +eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5ext) +cpanm --quiet --notest Module::Build + +# WTSI NPG Perl repo dependencies +repos="" +for repo in perl-dnap-utilities; +do + cd /tmp + # Always clone master when using depth 1 to get current tag + git clone --branch master --depth 1 ${WTSI_NPG_GITHUB_URL}/${repo}.git ${repo}.git + cd /tmp/${repo}.git + # Shift off master to appropriate branch (if possible) + git ls-remote --heads --exit-code origin ${WTSI_NPG_BUILD_BRANCH} && git pull origin ${WTSI_NPG_BUILD_BRANCH} && echo "Switched to branch ${WTSI_NPG_BUILD_BRANCH}" + repos=$repos" /tmp/${repo}.git" +done + +# Install CPAN dependencies. The src libs are on PERL5LIB because of +# circular dependencies. The blibs are on PERL5LIB because the package +# version, which cpanm requires, is inserted at build time. They must +# be before the libs for cpanm to pick them up in preference. + +for repo in $repos +do + export PERL5LIB=$repo/blib/lib:$PERL5LIB:$repo/lib +done + +for repo in $repos +do + cd $repo + cpanm --quiet --notest --installdeps . + perl Build.PL + ./Build +done + +# Finally, bring any common dependencies up to the latest version and +# install +eval $(perl -I ~/perl5ext/lib/perl5/ -Mlocal::lib=~/perl5npg) +for repo in $repos +do + cd $repo + cpanm --quiet --notest --installdeps . + ./Build install +done +cd From b3b19f1d7a08f250e14312b9317ea0e5f17971c2 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 4 May 2021 18:23:23 +0100 Subject: [PATCH 2/3] add bwa mem flags used for Hi-C library types (-5 -S -P -B) --- data/vtlib/bwa_mem_alignment.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/data/vtlib/bwa_mem_alignment.json b/data/vtlib/bwa_mem_alignment.json index af1f416bd..781727ea8 100644 --- a/data/vtlib/bwa_mem_alignment.json +++ b/data/vtlib/bwa_mem_alignment.json @@ -33,7 +33,14 @@ "subst_constructor":{ "vals":[ "-K", {"subst":"bwa_mem_K_value"} ] } }, {"id":"bwa_mem_p_flag","required":"no","default":"-p","comment":"by default, paired alignment is assumed"}, - {"id":"bwa_mem_Y_flag","required":"no","default":"-Y","comment":"by default, supplementary alignment sequences will be soft clipped instead of hard clipped"} + {"id":"bwa_mem_Y_flag","required":"no","default":"-Y","comment":"by default, supplementary alignment sequences will be soft clipped instead of hard clipped"}, + {"id":"bwa_mem_B_value","required":"no","comment":"if unspecified, -B flag is not used"}, + { + "id":"bwa_mem_B_flag", + "comment":"penalty for a mismatch [default 4]", + "required":"no", + "subst_constructor":{ "vals":[ "-B", {"subst":"bwa_mem_B_value"} ] } + } ], "nodes":[ { @@ -55,6 +62,11 @@ {"subst":"bwa_mem_p_flag"},{"subst":"bwa_mem_Y_flag"}, {"subst":"bwa_mem_T_flag"}, {"subst":"bwa_mem_K_flag"}, + {"select":"bwa_mem_5_flag", "required":true, "select_range":[1], "default":"off", "cases":{"on":"-5","off":[]},"comment":"for split alignment, take the alignment with the smallest coordinate as primary"}, + {"select":"bwa_mem_S_flag", "required":true, "select_range":[1], "default":"off", "cases":{"on":"-S","off":[]},"comment":"skip mate rescue"}, + {"select":"bwa_mem_P_flag", "required":true, "select_range":[1], "default":"off", "cases":{"on":"-P","off":[]},"comment":"skip pairing; mate rescue performed unless -S also in use"}, + {"subst":"bwa_mem_B_flag"}, + {"subst":"bwa_mem_arbitrary_flags"}, {"port":"db_prefix_reference_genome", "direction":"in"}, {"port":"fq","direction":"in"} ] From 2a8c8259616125c5e9a8ddf23bf3b45eca4fb49e Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Sat, 8 May 2021 00:58:20 +0100 Subject: [PATCH 3/3] change bam file, bam index and bam md5 from bamrecompress (biobambam2) to samtools and md5sum --- data/vtlib/alignment_common.json | 1 + data/vtlib/final_output_prep.json | 96 +++++++++++++++++++------------ 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/data/vtlib/alignment_common.json b/data/vtlib/alignment_common.json index 913a48998..d31d3218c 100644 --- a/data/vtlib/alignment_common.json +++ b/data/vtlib/alignment_common.json @@ -15,6 +15,7 @@ {"id":"bwa_executable","required":"no","default":"bwa0_6"}, {"id":"aligner_numthreads","required":"no","default":"2"}, {"id":"bam_ext","required":"no","default":".bam"}, + {"id":"bam_idx_ext","required":"no","default":".bai"}, {"id":"cram_ext","required":"no","default":".cram"}, {"id":"cram_idx_ext","required":"no","default":".cram.crai"}, {"id":"alignment_method","required":"yes"}, diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index a65aeca60..d524762dc 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -104,45 +104,21 @@ "postproc":{"op":"concat","pad":"="} } }, + {"id":"flagstats_filter_flag","required":"no","default":"0x900"}, + {"id":"fopid","required":"yes"}, { - "id":"br_indexfile_flag", - "required":"no", - "subst_constructor":{ - "vals":[ "indexfilename=", {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".bai" ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"br_md5file_flag", - "required":"no", - "subst_constructor":{ - "vals":[ "md5filename=", {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".bam.md5" ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"br_numthreads_flag", - "required":"no", - "subst_constructor":{ - "vals":[ "numthreads=", {"subst":"br_numthreads_val"} ], - "postproc":{"op":"concat", "pad":""} - } - }, - { - "id":"br_tmpfile_flag", - "required":"no", + "id":"bam_file", + "required":"yes", "subst_constructor":{ - "vals":[ "tmpfile=", {"subst":"outdatadir"}, "/", {"subst":"brtmp"}, "_", {"subst":"fopid"}, ".tmp" ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, {"subst":"bam_ext"} ], "postproc":{"op":"concat", "pad":""} } }, - {"id":"flagstats_filter_flag","required":"no","default":"0x900"}, - {"id":"fopid","required":"yes"}, { - "id":"bam_file", + "id":"bam_index_file", "required":"yes", "subst_constructor":{ - "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, {"subst":"bam_ext"} ], + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, {"subst":"bam_idx_ext"} ], "postproc":{"op":"concat", "pad":""} } }, @@ -160,6 +136,13 @@ "postproc":{"op":"concat", "pad":""} } }, + { + "id":"bam_md5", + "subst_constructor":{ + "vals":[ {"subst":"outdatadir"}, "/", {"subst":"fopid"}, ".bam.md5" ], + "postproc":{"op":"concat", "pad":""} + } + }, { "id":"cram_md5", "subst_constructor":{ @@ -293,6 +276,7 @@ {"port":"flagstat", "direction":"out"}, {"port":"calibration_pu", "direction":"out"}, {"port":"bam", "direction":"out"}, + {"port":"md5", "direction":"out"}, {"port":"samtools_stats_F0x900", "direction":"out"}, {"port":"samtools_stats_F0xB00", "direction":"out"}, {"port":"bam_stats", "direction":"out"}, @@ -342,6 +326,13 @@ ], "comment":"allow a generous 500 minutes for the teepot timeout; specify parameter value teepot_tempdir_value to specify teepot tempdir" }, + { + "id":"create_bam_md5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"md5sum" + }, { "id":"scramble_md5", "type":"EXEC", @@ -357,6 +348,14 @@ "cmd":[ "tr", "-d", " \\-\n" ], "comment":"the double-backslash is required to get the correct character set to the tr command" }, + { + "id":"postprocess_bam_md5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":[ "tr", "-d", " \\-\n" ], + "comment":"the double-backslash is required to get the correct character set to the tr command" + }, { "id":"cram_index", "type":"EXEC", @@ -436,14 +435,34 @@ "cmd": [ {"subst":"calibration_pu_executable"}, "-p", {"subst":"calibration_pu_prefix"}, "-filter-bad-tiles", {"subst":"calibration_pu_bad_tiles_count"}, "-" ] }, { - "id":"bamrecompress", + "id":"diskbam", + "comment":"convert compression level 0 bam stream to bam file", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd": [ "bamrecompress", "verbose=0", "index=1", {"subst":"br_indexfile_flag"}, "md5=1", {"subst":"br_md5file_flag"}, {"subst":"br_numthreads_flag"}, {"subst":"br_tmpfile_flag"} ] + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "view", "-b", + "--threads", {"subst":"diskbam_threads","required":true,"ifnull":4}, + {"subst":"diskbam_extra_flags", "required":false}, + "-" + ] + }, + { + "id":"bam_index", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": false, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "index", + {"subst":"diskbam_extra_flags", "required":false}, + {"port":"bam", "direction":"in"}, + {"port":"idx", "direction":"out"} + ] }, - { "id":"bam_file", "type":"OUTFILE", "name":{"subst":"bam_file"} }, + { "id":"bam_file", "type":"RAFILE", "name":{"subst":"bam_file"} }, + { "id":"bam_index_file", "type":"OUTFILE", "name":{"subst":"bam_index_file"} }, { "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} }, + { "id":"bam_md5", "type":"OUTFILE", "name":{"subst":"bam_md5"} }, { "id":"cram_md5", "type":"OUTFILE", "name":{"subst":"cram_md5"} }, { "id":"seqchksum_file", "type":"RAFILE", "name":{"subst":"seqchksum_file"} }, { "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" }, @@ -502,8 +521,13 @@ { "id":"bmdmw_to_seqchksum_extrahash", "from":"bmd_multiway:seqchksum_extrahash", "to":"seqchksum_extrahash" }, { "id":"bmdmw_to_flagstat", "from":"bmd_multiway:flagstat", "to":"flagstat_filter" }, { "id":"flagstat_filter_to_flagstat", "from":"flagstat_filter", "to":"flagstat" }, - { "id":"bmd_to_bam", "from":"bmd_multiway:bam", "to":"bamrecompress" }, - { "id":"brc_to_bam", "from":"bamrecompress", "to":"bam_file" }, + { "id":"bmd_to_bam", "from":"bmd_multiway:bam", "to":"diskbam" }, + { "id":"stv_to_bam", "from":"diskbam", "to":"bam_file" }, + { "id":"bam_to_idx", "from":"bam_file", "to":"bam_index:bam" }, + { "id":"idx_to_file", "from":"bam_index:idx", "to":"bam_index_file" }, + { "id":"bmw_to_md5", "from":"bmd_multiway:md5", "to":"create_bam_md5" }, + { "id":"md5_to_bam_postprocess", "from":"create_bam_md5", "to":"postprocess_bam_md5" }, + { "id":"corrected_bam_md5_out", "from":"postprocess_bam_md5", "to":"bam_md5" }, { "id":"tee_to_cram", "from":"scramble_tee:cram", "to":"cram_file" }, { "id":"corrected_md5_out", "from":"postprocess_md5", "to":"cram_md5" }, { "id":"scs_to_tee", "from":"seqchksum", "to":"seqchksum_file" },