From fb4d751eab93d0a40ad22df2b537a50744f30008 Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 18 May 2022 14:54:42 +1000 Subject: [PATCH] v0.3.5 - Added new trimming method (`bbduk`) for tiling experiment - Added new adapters for tiling - Removed R dependency `stringr` and `stringi` #7 --- bin/adapters/{FLU_f.fa => FLU_left.fa} | 0 bin/adapters/{FLU_r.fa => FLU_right.fa} | 0 bin/adapters/FLU_tile_left_h1.fa | 98 ++++++++++++ bin/adapters/FLU_tile_left_h3.fa | 130 +++++++++++++++ bin/adapters/FLU_tile_right_h1.fa | 98 ++++++++++++ bin/adapters/FLU_tile_right_h3.fa | 140 +++++++++++++++++ bin/adapters/NexteraPE-PE.fa | 12 -- bin/adapters/{RSV_f.fa => RSV_left.fa} | 0 bin/adapters/{RSV_r.fa => RSV_right.fa} | 0 bin/adapters/RSV_tile_left.fa | 20 +++ bin/adapters/RSV_tile_right.fa | 6 + bin/adapters/TruSeq2-PE.fa | 16 -- bin/adapters/TruSeq2-SE.fa | 6 - bin/adapters/TruSeq3-PE-2.fa | 12 -- bin/adapters/TruSeq3-PE.fa | 4 - bin/adapters/TruSeq3-SE.fa | 4 - bin/adapters/flu/FLU_G.fa | 12 -- bin/adapters/flu/FLU_a.fa | 14 -- bin/adapters/flu/FLU_f.fa | 8 - bin/adapters/flu/FLU_r.fa | 8 - bin/adapters/rsv/RSV_G.fa | 12 -- bin/adapters/rsv/RSV_a.fa | 14 -- bin/adapters/rsv/RSV_f.fa | 18 --- bin/adapters/rsv/RSV_f.fa.old | 14 -- bin/adapters/rsv/RSV_r.fa | 20 --- bin/adapters/rsv/RSV_r.fa.old | 14 -- snakefile | 201 ++++++++++++++++-------- tools/auto_install.sh | 6 +- tools/summaryReport.R | 4 - wfi_config.yaml | 11 ++ 30 files changed, 638 insertions(+), 264 deletions(-) rename bin/adapters/{FLU_f.fa => FLU_left.fa} (100%) rename bin/adapters/{FLU_r.fa => FLU_right.fa} (100%) create mode 100644 bin/adapters/FLU_tile_left_h1.fa create mode 100755 bin/adapters/FLU_tile_left_h3.fa create mode 100644 bin/adapters/FLU_tile_right_h1.fa create mode 100755 bin/adapters/FLU_tile_right_h3.fa delete mode 100644 bin/adapters/NexteraPE-PE.fa rename bin/adapters/{RSV_f.fa => RSV_left.fa} (100%) rename bin/adapters/{RSV_r.fa => RSV_right.fa} (100%) create mode 100755 bin/adapters/RSV_tile_left.fa create mode 100755 bin/adapters/RSV_tile_right.fa delete mode 100644 bin/adapters/TruSeq2-PE.fa delete mode 100644 bin/adapters/TruSeq2-SE.fa delete mode 100644 bin/adapters/TruSeq3-PE-2.fa delete mode 100644 bin/adapters/TruSeq3-PE.fa delete mode 100644 bin/adapters/TruSeq3-SE.fa delete mode 100644 bin/adapters/flu/FLU_G.fa delete mode 100644 bin/adapters/flu/FLU_a.fa delete mode 100644 bin/adapters/flu/FLU_f.fa delete mode 100644 bin/adapters/flu/FLU_r.fa delete mode 100644 bin/adapters/rsv/RSV_G.fa delete mode 100644 bin/adapters/rsv/RSV_a.fa delete mode 100644 bin/adapters/rsv/RSV_f.fa delete mode 100644 bin/adapters/rsv/RSV_f.fa.old delete mode 100644 bin/adapters/rsv/RSV_r.fa delete mode 100644 bin/adapters/rsv/RSV_r.fa.old diff --git a/bin/adapters/FLU_f.fa b/bin/adapters/FLU_left.fa similarity index 100% rename from bin/adapters/FLU_f.fa rename to bin/adapters/FLU_left.fa diff --git a/bin/adapters/FLU_r.fa b/bin/adapters/FLU_right.fa similarity index 100% rename from bin/adapters/FLU_r.fa rename to bin/adapters/FLU_right.fa diff --git a/bin/adapters/FLU_tile_left_h1.fa b/bin/adapters/FLU_tile_left_h1.fa new file mode 100644 index 0000000..ce8adfb --- /dev/null +++ b/bin/adapters/FLU_tile_left_h1.fa @@ -0,0 +1,98 @@ +>H1pdmHA_0_LEFT +GAGCAAAAGCAGGGGAAAACA +>H1pdmHA_1_LEFT_me +TCTGCTGTATACATTTACAACCGC +>H1pdmHA_1_LEFT_me_alt1 +TCTGCTGTATACATTCACAACCGC +>H1pdmHA_1_LEFT_me_alt2 +TCTACTGTGTACATTTACAACCGC +>H1pdmHA_2_LEFT_new +GGTAGCCCCATTGCATTTGG +>H1pdmHA_3_LEFT +ACAATGGAACGTGTTACCCAGG +>H1pdmHA_4_LEFT +GTGTAACGGCAGCATGTCCT +>H1pdmHA_4_LEFT_alt1 +GTGTAACGGCAGCATGTTCT +>H1pdmHA_4_LEFT_alt2 +GTGTAACAGCAGCATGTCCT +>H1pdmHA_5_LEFT +AAAGGGAAAGAAGTCCTCGTGC +>H1pdmHA_6_LEFT +GGGATCAAGAAGGGAGAATGAACT +>H1pdmHA_7_LEFT +TGCAATACAACTTGTCAGACACC +>H1pdmHA_7_LEFT_alt1 +TGCAATACAACTTGTCAGACCCC +>H1pdmHA_8_LEFT +CAATCTAGAGGCCTATTCGGGG +>H1pdmHA_8_LEFT_alt1 +TCAATCTAGAGGCCTATTTGGGG +>H1pdmHA_9_LEFT +AGAGCACACAAAATGCCATTGA +>H1pdmHA_9_LEFT_alt1 +AGAGCACACAGAATGCCATTGA +>H1pdmHA_10_LEFT +CCTGGACATTTGGACTTACAATGC +>H1pdmHA_11_LEFT +TGCTTTGAATTTTACCACAAATGCG +>H1pdmHA_last_LEFT +ACCCAAAATACTCAGAGGAAGCA +>H1pdmNA_0_LEFT +CAGCAAAAGCAGGAGTTTAAAATGAA +>H1pdmNA_0_LEFT_alt1 +AGCAAAAGCAGGAGTTTTAAAATGAA +>H1pdmNA_2_LEFT +TGAAACATGCAATCAAAGCGTCAT +>H1pdmNA_2_LEFT_alt1 +TTGAAACATGCAATAAAAGCGTCAT +>H1pdmNA_2_LEFT_alt2 +CAGATTGAAACATGCAATAAAAACGTCAT +>H1pdmNA_3_LEFT +AGTGGATGGGCTATATACACTAAAGAC +>H1pdmNA_3_LEFT_alt1 +AGTGGATGGGCTATATACAGTAAAGAC +>H1pdmNA_4_LEFT_me +TCGAACCCTAATGAGCTGTCC +>H1pdmNA_4_LEFT_me_alt1 +CGAACCCTAATGAGCTGCCC +>H1pdmNA_5_LEFT +GGCATAATAACAGACACTATCAAGAGTT +>H1pdmNA_6_LEFT +CATACAAGATCTTCAGAATAGAAAAGGGAA +>H1pdmNA_6_LEFT_alt1 +TCATACAAAATCTTCAGAATAGAAAAGGGAA +>H1pdmNA_6_LEFT_alt2 +CCTCATACAAAATCTTCAGAATAGAAAAAGGAA +>H1pdmNA_7_LEFT +AATCGACCGTGGGTGTCTTT +>H1pdmNA_8_LEFT_new2 +ATACGGCAATGGTGTTTGGATAG +>H1pdmNA_last_LEFT_new +GTATAAGACCTTGCTTCTGGGTTG +>A-M-UniF +GGGGGGAGCAAAAGCAGGTAG +>H1pdmMP_1_LEFT +GAGGCTCTCATGGAATGGCTA +>H1pdmMP_2_LEFT_me +TGACTAAGGGGATTTTAGGGTTTGT +>H1pdmMP_2_LEFT_me_alt1 +CTCTGACTAAGGGAATTTTAGGATTTGT +>H1pdmMP_2_LEFT_me_alt2 +TCTGACTAAGGGGATTTTAGGATTTGT +>H1pdmMP_3_LEFT_new_alt1 +CCAAGGAGGTGTCACTAAGCT +>H1pdmMP_3_LEFT_new_alt2 +GCCAAAGAAGTGTCACTAAGCT +>H1pdmMP_4_LEFT_me +CTGATTCACAGCATCGGTCTCA +>H1pdmMP_5_LEFT +CTGGATCAAGTGAACAGGCAG +>H1pdmMP_5_LEFT_alt1 +CTGGATCGAGTGAACAGGCAG +>H1pdmMP_6_LEFT +AAGCGAATGGGAGTGCAGATG +>H1pdmMP_6_LEFT_alt1 +GCGGATGGGAGTGCAGATG +>H1pdmMP_last_LEFT +TCATTGGGATCTTGCACCTGA diff --git a/bin/adapters/FLU_tile_left_h3.fa b/bin/adapters/FLU_tile_left_h3.fa new file mode 100755 index 0000000..c0c3ba9 --- /dev/null +++ b/bin/adapters/FLU_tile_left_h3.fa @@ -0,0 +1,130 @@ +>H3N2HA_0_LEFT +CAGCAAAAGCAGGGGATAATTCT +>H3N2HA_1_LEFT +CACCATGCAGTACCAAACGGAA +>H3N2HA_2_LEFT_me +TGCGACAGTCCTCATCAGATC +>H3N2HA_2_LEFT_me_alt1 +AATATGCAACAGTCCTCATCAGATC +>H3N2HA_3_LEFT +ATGATGTGCCGGATTATGCCTC +>H3N2HA_3_LEFT_alt1 +ATGATGTGCCGGATTATGCATC +>H3N2HA_4_LEFT +GGAGTCACTCAAAACGGAACAAG +>H3N2HA_4_LEFT_alt1 +TGGAGTCACTCAAAACGGAAAAAG +>H3N2HA_4_LEFT_alt2 +TGGAGTCAAACAAAACGGAACAAG +>H3N2HA_4.5_LEFT_new2_alt1 +GTGACTATGCCAAACAAGGAACA +>H3N2HA_4.5_LEFT_new2_alt2 +CGTGACTATGCCAAACAATGAACA +>H3N2HA_5_LEFT +TCAGGAAGAATCACAGTATCTACCAA +>H3N2HA_5_LEFT_alt1 +TCAGGAAGAATCACAGTCTCTACCAA +>H3N2HA_5_LEFT_alt2 +TCAGGAAGAATCACAGTATCTACCAG +>H3N2HA_6_LEFT +GGAATCTGATTGCTCCTAGGGG +>H3N2HA_6_LEFT_alt1 +GGGAATCTAATTGCTCCTAGGGG +>H3N2HA_7_LEFT +AACAGGATCACATACGGGGC +>H3N2HA_7_LEFT_alt1 +GTAAACAGGATCACATATGGGGC +>H3N2HA_8_LEFT +GTTGGTACGGTTTCAGGCATCA +>H3N2HA_8_LEFT_alt1 +GGTACGGCTTCAGGCATCA +>H3N2HA_8_LEFT_alt2 +GTTGGTACGGCTTCAGACATCA +>H3N2HA_9_LEFT_new2 +CCAACGAGAAATTCCATCAGATTGA +>H3N2HA_10_LEFT +TTGATCTAACTGACTCAGAAATGAACAA +>H3N2HA_10_LEFT_alt1 +TTGACCTAACTGACTCAGAAATGAACAA +>H3N2HA_last_LEFT_new3_alt1 +CAACCGGTTCCAGATCAAGGG +>H3N2HA_last_LEFT_new3_alt2 +CAACCGGTTCCAGATCAAAGG +>H3N2NA_0_LEFT +AGCAAAAGCAGGAGTAAAGATGAA +>H3N2NA_2_LEFT_new1_alt1 +CCCCCAAACAACCAAGTGATG +>H3N2NA_2_LEFT_new1_alt2 +CCCCCCAAATAACCAAGTGATG +>H3N2NA_2_LEFT_new1_alt3 +CCCCCGAATAACCAAGTGATG +>H3N2NA_3_LEFT +TGTGACATTACAGGATTTGCACC +>H3N2NA_3_LEFT_alt1 +GTGGCATTACAGGATTTGCACC +>H3N2NA_4_LEFT_new_alt1 +CGTGCATTCAAATAACACAGTACGT +>H3N2NA_4_LEFT_new_alt2 +TGCATTCAAATAACACAGCACGT +>H3N2NA_5_LEFT +CACGATGGAAAAGCATGGCTG +>H3N2NA_6_LEFT +CTCAGGACCCAGGAGTCAGAAT +>H3N2NA_6_LEFT_alt1 +CTCAGGACCCAGGAATCAGAAT +>H3N2NA_6_LEFT_alt2 +CTCAGAACCCAGGAGTCAGAAT +>H3N2NA_7_LEFT_me +GAGTGCTCTTGCTATCCTCGA +>H3N2NA_7_LEFT_me_alt1 +GAGTGCTCCTGCTATCCTCGA +>H3N2NA_8_LEFT_new1_alt1 +GATCCTAACAATGAAGAAGGTGGTC +>H3N2NA_8_LEFT_new1_alt2 +AACCCTAACAATGAAAAAGGTGGTC +>H3N2NA_8_LEFT_new1_alt3 +AATCCTAACAATGAAGAAGGTGGTC +>H3N2NA_8_LEFT_new1_alt4 +TGAACCCTAACAATGAAAAAGGTGATC +>H3N2NA_9_LEFT +GAAGGCTGGTCCAACCCTAA +>H3N2NA_9_LEFT_alt1 +TTGAAGGCTGGTCCAACTCTAA +>H3N2NA_9_LEFT_alt2 +AAGGCTGGTCCAACCCCAA +>H3N2NA_9_LEFT_alt3 +GAAGGCTGGTCCAATCCCAA +>H3N2NA_last_LEFT_new2 +GCTGCATCAATCGGTGCTTT +>A-M-UniF +GGGGGGAGCAAAAGCAGGTAG +>H3N2MP_re_2_LEFT_new1_alt1 +AGGCTCTCATGGAATGGCTAAAG +>H3N2MP_re_2_LEFT_new1_alt2 +AGGCTCTCATGGAATGGTTAAAG +>H3N2MP_3_LEFT +TCAAACTGTATAGGAAACTTAAGAGGGA +>H3N2MP_3_LEFT_alt1 +GTTAAACTGTATAGGAAACTTAAGAGGGA +>H3N2MP_3_LEFT_alt2 +AGTTAAACTGTATAGGAAACTTAAGAGAGA +>H3N2MP_4_LEFT +GGGGCTGTAACCACTGAAGT +>H3N2MP_5_LEFT +TGGATCAAGTGAGCAGGCAG +>H3N2MP_5_LEFT_alt1 +CTGGATCAAGTGAGCAAGCAG +>H3N2MP_5_LEFT_alt2 +CTGGATCAAGTGAACAGGCAG +>H3N2MP_6_LEFT +TTGCAGACCTATCAGAAACGAATG +>H3N2MP_6_LEFT_alt1 +GAAAATTTACAGACCTATCAGAAACGAATG +>H3N2MP_re_last_LEFT_new1 +ATTGGGATCTTGCACTTGATATTGT +>PrefixNX_1 +AGATGTGTATAAGAGACAG +>Trans1 +TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG +>Trans2 +GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG \ No newline at end of file diff --git a/bin/adapters/FLU_tile_right_h1.fa b/bin/adapters/FLU_tile_right_h1.fa new file mode 100644 index 0000000..236eb81 --- /dev/null +++ b/bin/adapters/FLU_tile_right_h1.fa @@ -0,0 +1,98 @@ +>H1pdmHA_0_RIGHT +GGCTACCCCTCTTAGTTTGCA +>H1pdmHA_0_RIGHT_alt1 +GGGCTACTCCTCTTAGTTTGCA +>H1pdmHA_1_RIGHT_me +GATTTCCCAGGATCCAGCCA +>H1pdmHA_2_RIGHT +ACCTTTCAAATGATGACACTGAGC +>H1pdmHA_3_RIGHT +TGAGCTTTGGGTATGAATTTCCTTT +>H1pdmHA_3_RIGHT_alt1 +TGAGCTTTGGGTATGAATTCCCTTT +>H1pdmHA_3_RIGHT_alt2 +GGTTGATCTTTGGGTATGATTTTCCTTT +>H1pdmHA_4_RIGHT +TGCTGTATCTTGATGACCCCAC +>H1pdmHA_4_RIGHT_alt1 +TGCTGTATCTTGATGTCCCCAC +>H1pdmHA_5_RIGHT +TCCAGTTGCTTCGAATGTTATTTTGT +>H1pdmHA_6_RIGHT_new_alt1 +TGAAATGGGAGGCTGGTGTT +>H1pdmHA_6_RIGHT_new_alt2 +TTCTGAAATGGGAGACTGGTGTT +>H1pdmHA_6_RIGHT_new_alt3 +TTCTGAAATGGAAGGCTGGTGTT +>H1pdmHA_7_RIGHT +ACCATCCATCTACCATCCCTGT +>H1pdmHA_7_RIGHT_alt1 +GTACCATCCATCTACCATTCCTGT +>H1pdmHA_8_RIGHT +GGTGGTTGAACTCTTTACCCAC +>H1pdmHA_8_RIGHT_alt1 +AAGGTGGTTGAACTCTTTACCAAC +>H1pdmHA_9_RIGHT +CTTCACATTTGAATCGTGATAGTCCA +>H1pdmHA_9_RIGHT_alt1 +TTCACATTTGAATCGTGGTAGTCCA +>H1pdmHA_10_RIGHT +TGAGTATTTTGGGTAGTCATAAGTCCC +>H1pdmHA_11_RIGHT +CACATCCAGAAACTGATTGCCC +>H1pdmHA_11_RIGHT_alt1 +ACATCCAGAAGCTGATTGCCC +>A-HA-R-new +CGGGTTATTAGTAGAAACAAGGGTG +>H1pdmNA_0_RIGHT +ACATATGTCTGATTTACCCAAGTATTGTT +>H1pdmNA_0_RIGHT_alt1 +ATGTCTGATTTACCCAAGTGTTGTT +>H1pdmNA_2_RIGHT +TGGTTCCCTTATGACAAACACATC +>H1pdmNA_3_RIGHT +TTGTATGGAGAGGGAACTTCACC +>H1pdmNA_4_RIGHT_me +CACATTCAGACTCTTGTGTTCTCAA +>H1pdmNA_5_RIGHT +CACTAGAATCAGGATAACAGGAGCA +>H1pdmNA_5_RIGHT_alt1 +ACTAGAATCAGGGTAACAGGAGCA +>H1pdmNA_5_RIGHT_alt2 +CTAGAGTCGGGATAACAGGAGCA +>H1pdmNA_6_RIGHT +CATTAGGGCGTGGATTGTCTCC +>H1pdmNA_7_RIGHT +TCCCAAATCATCTCAAAACCTTTTCT +>H1pdmNA_7_RIGHT_alt1 +TCCCAAATCATCTCAAAACCTTTCCT +>H1pdmNA_7_RIGHT_alt2 +TCCCAAATCATTTCAAAACCTTTTCT +>H1pdmNA_8_RIGHT_new1 +TCAACCCAGAAGCAAGGTCTTAT +>H1pdmNA_last_RIGHT +TTAGTAGAAACAAGGAGTTTTTTGAAC +>H1pdmMP_0_RIGHT +GGTGAGCGTGAACACAAATCC +>H1pdmMP_1_RIGHT +TTGGCCCCATGGAACGTTATTT +>H1pdmMP_1_RIGHT_alt1 +CTTGGCCCCATGGAATGTTATTT +>H1pdmMP_2_RIGHT_me +TATATGAGGCCCATGCAACTGG +>H1pdmMP_2_RIGHT_me_alt1 +TATATGAGGCCCATGCAACTTG +>H1pdmMP_3_RIGHT_me +ACCATTCTGTTCTCATGCCTGA +>H1pdmMP_3_RIGHT_me_alt1 +CACCATTCTGTTTTCATGCCTGA +>H1pdmMP_4_RIGHT_me +CAGCACTGGAGCTAGGATGAGT +>H1pdmMP_5_RIGHT_new1 +TCCCAATGATGTTTGCTGCAA +>H1pdmMP_6_RIGHT +CGTCAACATCCACAGCACTCT +>H1pdmMP_6_RIGHT_alt1 +ATCGTCAACATCTACAGCACTCT +>A-M-UniR +CCGGGTTATTAGTAGAAACAAGGTAG \ No newline at end of file diff --git a/bin/adapters/FLU_tile_right_h3.fa b/bin/adapters/FLU_tile_right_h3.fa new file mode 100755 index 0000000..d2d4b48 --- /dev/null +++ b/bin/adapters/FLU_tile_right_h3.fa @@ -0,0 +1,140 @@ +>H3N2HA_0_RIGHT +TTGAGGAATTCTGAACCAGCTCA +>H3N2HA_0_RIGHT_alt1 +CTATTGAGGAATTCTGAACCAACTCA +>H3N2HA_0_RIGHT_alt2 +GAGGAACTCTGAACCAGCTCA +>H3N2HA_0_RIGHT_alt3 +CCTATTGAGGAATTCTGAACCAATTCA +>H3N2HA_0_RIGHT_alt4 +GAGGAGTTCTGAACCAGCTCA +>H3N2HA_1_RIGHT +CTTCGTTCAACAAAAAGGTCCCA +>H3N2HA_1_RIGHT_alt1 +GCGTTCAACAAAAAGGTCCCA +>H3N2HA_1_RIGHT_alt2 +CCGTTCAACGAAAAGGTCCCA +>H3N2HA_2_RIGHT_me +GCTTTCATTTTTAAACTCCAGTGTGC +>H3N2HA_2_RIGHT_me_alt1 +CTTTCATTGTTAAACTCCAGTGTGC +>H3N2HA_3_RIGHT_new1_alt1 +TCCTTGTTTGGCATAGTCACGT +>H3N2HA_3_RIGHT_new1_alt2 +GTTCCTTGTTTGGCATAGTCACAT +>H3N2HA_3_RIGHT_new1_alt3 +GTTCATTGTTTGGCATAGTCACGT +>H3N2HA_4_RIGHT2 +CTGTGATTCTTCCTGATGATTGAGC +>H3N2HA_4_RIGHT2_alt1 +CTGTGATTCTTCCTGATGATTGTGC +>H3N2HA_4_RIGHT2_alt2 +GTGATTCTTCCTGATGCTTGAGC +>H3N2HA_4_RIGHT2_alt3 +GTGATTCTTCCTGATGGTTGAGC +>H3N2HA_4_RIGHT2_alt4 +GTGATTCTTCCTGATGATCGAGC +>H3N2HA_4.5_RIGHT +ATGTCTCCCGGTTTTACTATTGTCC +>H3N2HA_4.5_RIGHT_alt1 +ATGTCTCCTGGTTTTACTATTGTCC +>H3N2HA_5_RIGHT +CCAATGGGTGCATCTGATCTCA +>H3N2HA_5_RIGHT_new2_alt1 +CATTATTGAGCTTTTCCCACTTCGT +>H3N2HA_5_RIGHT_new2_alt2 +TCATTATTGAGCTTTTCCCACTTTGT +>H3N2HA_6_RIGHT +GGTACATTTCGCATTCCTGTTGC +>H3N2HA_6_RIGHT_alt1 +GTACATTTCGCATCCCTGTTGC +>H3N2HA_7_RIGHT +TGCTGCTTGAGTGCTTTTGAGA +>H3N2HA_7_RIGHT_alt1 +TGCTGCTTGAGTGCTTTTAAGA +>H3N2HA_8_RIGHT +CGTTGTATGACCAGAGATCTATTTTAGT +>H3N2HA_8_RIGHT_alt1 +CGTTGTATGACCAGAGATCTATTTTTGT +>H3N2HA_9_RIGHT_new1 +CAGCATTTTCCCTCAGTTGCT +>H3N2HA_10_RIGHT +CTTTGTACCCTGACTTCAGCTCA +>H3N2HA_10_RIGHT_alt1 +TCTTTGTATCCTGACTTCAGCTCA +>A-HA-UniR +CCGGGTTATTAGTAGAAACAAGGGTG +>H3N2NA_0_RIGHT_new_alt1 +GTTTCTTTCTATTATTGTTGGTTCACACAG +>H3N2NA_0_RIGHT_new_alt2 +GTTTCTTTCTATTATTGTTGGTTCACATAG +>H3N2NA_0_RIGHT_new_alt3 +TTTCTTTCTATCATTGTTGGTTCACACAG +>H3N2NA_0_RIGHT_new_alt4 +GTTTCTTTCTATTATAGTTGGTTCACACAG +>H3N2NA_2_RIGHT_new1 +CTCTTGTCACCCAGATGTCCC +>H3N2NA_3_RIGHT +GGAACACCCAACTCATTCATCAAT +>H3N2NA_3_RIGHT_alt1 +GGAACACCCAACTCACTCATCAAT +>H3N2NA_3_RIGHT_alt2 +GGAACACCCAATTCACTCATCAAT +>H3N2NA_3_RIGHT_alt3 +AAAGGAACACCTAACTCATTCATCAAT +>H3N2NA_3_RIGHT_alt4 +AAAGGAACACCCAACTCATTCATTAAT +>H3N2NA_4_RIGHT +CAACACTATCTACAAGCCTCCCA +>H3N2NA_4_RIGHT_alt1 +AAACAATACTATCTACAAGCCTCCCA +>H3N2NA_5_RIGHT +CCCCTCCTCAATGAATAGTATTTTAGTATC +>H3N2NA_5_RIGHT_alt1 +CCCCTCCTCAATGAATAGTATTTTAGTAAC +>H3N2NA_5_RIGHT_alt2 +CCCTCCTCAATGAATAGTATTTTAGTGTC +>H3N2NA_5_RIGHT_alt3 +TTCCCCTCCTCAATGAATAATATTTTAGTATC +>H3N2NA_6_RIGHT +GAGCCTTTCCAGTTGTCTCTGC +>H3N2NA_6_RIGHT_alt1 +GATCCTTTCCAGTTGTCTCTGC +>H3N2NA_7_RIGHT_me +GCCCAGCCTTTCACTCCAT +>H3N2NA_8_RIGHT_new2 +ACCAGAATAACCGGACCTATCAC +>H3N2NA_9_RIGHT +TGAGCCTGTTCCATATGTACCTG +>A-NA-UniR +CCGGGTTATTAGTAGAAACAAGGAGT +>H3N2MP_0_RIGHT +GGTGAGCGTGAAAACAAACCC +>H3N2MP_0_RIGHT_alt1 +GTGAGCGTGAACACAAACCC +>H3N2MP_0_RIGHT_alt2 +GGTGAGCGTGAACACAAATCC +>H3N2MP_re_2_RIGHT_new2_alt1 +ATTTCTTTGGCCCCGTGGAA +>H3N2MP_re_2_RIGHT_new2_alt2 +CTATTTCTTTGGCCCCATGGAAC +>H3N2MP_3_RIGHT +TGGATTGGTTGTTGCCACCAT +>H3N2MP_4_RIGHT +CATCTGCCTGGCCTGACTAG +>H3N2MP_re_5_RIGHT_new2_alt1 +TATTCGCGGCAACAACAAGC +>H3N2MP_re_5_RIGHT_new2_alt2 +GATATTCGCGGCAACAATAAGC +>H3N2MP_6_RIGHT +GCTGTTCCTTTCGATATTCTTCCC +>H3N2MP_6_RIGHT_alt1 +CTGTTCCTTTCGGTATTCTTCCC +>H3N2MP_last_RIGHT +GACCAGTAGAAACAAGGTAGTTTTT +>PrefixNX_2 +AGATGTGTATAAGAGACAG +>Trans1_rc +CTGTCTCTTATACACATCTGACGCTGCCGACGA +>Trans2_rc +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC \ No newline at end of file diff --git a/bin/adapters/NexteraPE-PE.fa b/bin/adapters/NexteraPE-PE.fa deleted file mode 100644 index a986757..0000000 --- a/bin/adapters/NexteraPE-PE.fa +++ /dev/null @@ -1,12 +0,0 @@ ->PrefixNX/1 -AGATGTGTATAAGAGACAG ->PrefixNX/2 -AGATGTGTATAAGAGACAG ->Trans1 -TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG ->Trans1_rc -CTGTCTCTTATACACATCTGACGCTGCCGACGA ->Trans2 -GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG ->Trans2_rc -CTGTCTCTTATACACATCTCCGAGCCCACGAGAC \ No newline at end of file diff --git a/bin/adapters/RSV_f.fa b/bin/adapters/RSV_left.fa similarity index 100% rename from bin/adapters/RSV_f.fa rename to bin/adapters/RSV_left.fa diff --git a/bin/adapters/RSV_r.fa b/bin/adapters/RSV_right.fa similarity index 100% rename from bin/adapters/RSV_r.fa rename to bin/adapters/RSV_right.fa diff --git a/bin/adapters/RSV_tile_left.fa b/bin/adapters/RSV_tile_left.fa new file mode 100755 index 0000000..e7252b4 --- /dev/null +++ b/bin/adapters/RSV_tile_left.fa @@ -0,0 +1,20 @@ +>RSVS1_01F +ACGCGAAAAAATGCGTACTACAAAC +>RSV-F1-R1876 +CTGMACCATAGGCATTCATAAACA +>RSV_F4_F2925 +GCTATGGCAAGACTYAGGAATG +>RSV_F5_R6247.6 +TTGAGRTCTAACACTTTGCTGGT +>RSV_Seg3_7215F +TGATGCATCAATATCTCAAGTCA +>RSV_F3_F1563 +ATGGGAGARGTRGCTCCAGAATA +>RSV_F5_F4336.4 +AGCAAATTYTGGCCYTAYTTTAC +>RSV_F6-new-B-R7916 +CTCATAGCAACACATGCTGATTG +>RSV_F6-new-A-R7911.1 +GAGTTTGCTCATGGCAACACAT +>RSV_Seg4_10959F +TGGACCATWGAAGCYATATCA diff --git a/bin/adapters/RSV_tile_right.fa b/bin/adapters/RSV_tile_right.fa new file mode 100755 index 0000000..3c50bd1 --- /dev/null +++ b/bin/adapters/RSV_tile_right.fa @@ -0,0 +1,6 @@ +>RSV_Seg3_11165R +GRCCTATDCCTGCATACTC +>RSVS1_4017R +CGTGTAGCTGTRTGYTTCCAA +>RSV_Seg4_15333R +AGTGTCAAAAACTAATRTCTCGT diff --git a/bin/adapters/TruSeq2-PE.fa b/bin/adapters/TruSeq2-PE.fa deleted file mode 100644 index 93ce633..0000000 --- a/bin/adapters/TruSeq2-PE.fa +++ /dev/null @@ -1,16 +0,0 @@ ->PrefixPE/1 -AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PrefixPE/2 -CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT ->PCR_Primer1 -AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PCR_Primer1_rc -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT ->PCR_Primer2 -CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT ->PCR_Primer2_rc -AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG ->FlowCell1 -TTTTTTTTTTAATGATACGGCGACCACCGAGATCTACAC ->FlowCell2 -TTTTTTTTTTCAAGCAGAAGACGGCATACGA \ No newline at end of file diff --git a/bin/adapters/TruSeq2-SE.fa b/bin/adapters/TruSeq2-SE.fa deleted file mode 100644 index 9ff36ad..0000000 --- a/bin/adapters/TruSeq2-SE.fa +++ /dev/null @@ -1,6 +0,0 @@ ->TruSeq2_SE -AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG ->TruSeq2_PE_f -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT ->TruSeq2_PE_r -AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG \ No newline at end of file diff --git a/bin/adapters/TruSeq3-PE-2.fa b/bin/adapters/TruSeq3-PE-2.fa deleted file mode 100644 index b205511..0000000 --- a/bin/adapters/TruSeq3-PE-2.fa +++ /dev/null @@ -1,12 +0,0 @@ ->PrefixPE/1 -TACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PrefixPE/2 -GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT ->PE1 -TACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PE1_rc -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA ->PE2 -GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT ->PE2_rc -AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ No newline at end of file diff --git a/bin/adapters/TruSeq3-PE.fa b/bin/adapters/TruSeq3-PE.fa deleted file mode 100644 index e370ca1..0000000 --- a/bin/adapters/TruSeq3-PE.fa +++ /dev/null @@ -1,4 +0,0 @@ ->PrefixPE/1 -TACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PrefixPE/2 -GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT \ No newline at end of file diff --git a/bin/adapters/TruSeq3-SE.fa b/bin/adapters/TruSeq3-SE.fa deleted file mode 100644 index be2a32d..0000000 --- a/bin/adapters/TruSeq3-SE.fa +++ /dev/null @@ -1,4 +0,0 @@ ->TruSeq3_IndexedAdapter -AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC ->TruSeq3_UniversalAdapter -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA \ No newline at end of file diff --git a/bin/adapters/flu/FLU_G.fa b/bin/adapters/flu/FLU_G.fa deleted file mode 100644 index adc3796..0000000 --- a/bin/adapters/flu/FLU_G.fa +++ /dev/null @@ -1,12 +0,0 @@ ->RSV-F1-R1876 -XGTTTGTAGTACGCATTTTTTCGCGT ->RSV_F5_R6247.6 -XCATTCCTRAGTCTTGCCATAGC ->RSV_Seg3_11165R -XTGACTTGAGATATTGATGCATCA ->RSVS1_4017R -XTATTCTGGAGCYACYTCTCCCAT ->RSV_F6-new-B-R7916 -XGTAAARTARGGCCARAATTTGCT ->RSV_Seg4_15333R -XTGATATRGCTTCWATGGTCCA \ No newline at end of file diff --git a/bin/adapters/flu/FLU_a.fa b/bin/adapters/flu/FLU_a.fa deleted file mode 100644 index 9b7b569..0000000 --- a/bin/adapters/flu/FLU_a.fa +++ /dev/null @@ -1,14 +0,0 @@ ->RSVS1_01F -TGTTTATGAATGCCTATGGTKCAGX ->RSV_F4_F2925 -ACCAGCAAAGTGTTAGAYCTCAAX ->RSV_Seg3_7215F -GAGTATGCAGGHATAGGYCX ->RSV_F3_F1563 -TTGGAARCAYACAGCTACACGX ->RSV_F5_F4336.4 -CAATCAGCATGTGTTGCTATGAGX ->RSV_F5_F4336.4.2 -ATGTGTTGCCATGAGCAAACTCX ->RSV_Seg4_10959F -ACGAGAYATTAGTTTTTGACACTX \ No newline at end of file diff --git a/bin/adapters/flu/FLU_f.fa b/bin/adapters/flu/FLU_f.fa deleted file mode 100644 index e10484a..0000000 --- a/bin/adapters/flu/FLU_f.fa +++ /dev/null @@ -1,8 +0,0 @@ ->PrefixNX_1 -AGATGTGTATAAGAGACAG ->Trans1 -TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG ->Trans2 -GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG ->Uni13_Inf_1 -CGGGTTATTAGTAGAAACAAGG \ No newline at end of file diff --git a/bin/adapters/flu/FLU_r.fa b/bin/adapters/flu/FLU_r.fa deleted file mode 100644 index 8b54531..0000000 --- a/bin/adapters/flu/FLU_r.fa +++ /dev/null @@ -1,8 +0,0 @@ ->PrefixNX_2 -AGATGTGTATAAGAGACAG ->Trans1_rc -CTGTCTCTTATACACATCTGACGCTGCCGACGA ->Trans2_rc -CTGTCTCTTATACACATCTCCGAGCCCACGAGAC ->Uni12_Inf_3 -GGGGGGAGCGAAAGCAGG \ No newline at end of file diff --git a/bin/adapters/rsv/RSV_G.fa b/bin/adapters/rsv/RSV_G.fa deleted file mode 100644 index adc3796..0000000 --- a/bin/adapters/rsv/RSV_G.fa +++ /dev/null @@ -1,12 +0,0 @@ ->RSV-F1-R1876 -XGTTTGTAGTACGCATTTTTTCGCGT ->RSV_F5_R6247.6 -XCATTCCTRAGTCTTGCCATAGC ->RSV_Seg3_11165R -XTGACTTGAGATATTGATGCATCA ->RSVS1_4017R -XTATTCTGGAGCYACYTCTCCCAT ->RSV_F6-new-B-R7916 -XGTAAARTARGGCCARAATTTGCT ->RSV_Seg4_15333R -XTGATATRGCTTCWATGGTCCA \ No newline at end of file diff --git a/bin/adapters/rsv/RSV_a.fa b/bin/adapters/rsv/RSV_a.fa deleted file mode 100644 index 9b7b569..0000000 --- a/bin/adapters/rsv/RSV_a.fa +++ /dev/null @@ -1,14 +0,0 @@ ->RSVS1_01F -TGTTTATGAATGCCTATGGTKCAGX ->RSV_F4_F2925 -ACCAGCAAAGTGTTAGAYCTCAAX ->RSV_Seg3_7215F -GAGTATGCAGGHATAGGYCX ->RSV_F3_F1563 -TTGGAARCAYACAGCTACACGX ->RSV_F5_F4336.4 -CAATCAGCATGTGTTGCTATGAGX ->RSV_F5_F4336.4.2 -ATGTGTTGCCATGAGCAAACTCX ->RSV_Seg4_10959F -ACGAGAYATTAGTTTTTGACACTX \ No newline at end of file diff --git a/bin/adapters/rsv/RSV_f.fa b/bin/adapters/rsv/RSV_f.fa deleted file mode 100644 index a470711..0000000 --- a/bin/adapters/rsv/RSV_f.fa +++ /dev/null @@ -1,18 +0,0 @@ ->PrefixNX/1 -XAGATGTGTATAAGAGACAG ->Trans1 -XTCGTCGGCAGCGTCAGATGTGTATAAGAGACAG ->Trans2 -XGTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG ->RSVS1_01F -XACGCGAAAAAATGCGTACTACAAAC ->RSV_F4_F2925 -XGCTATGGCAAGACTYAGGAATG ->RSV_Seg3_7215F -XTGATGCATCAATATCTCAAGTCA ->RSV_F3_F1563 -XATGGGAGARGTRGCTCCAGAATA ->RSV_F5_F4336 -XAGCAAATTYTGGCCYTAYTTTAC ->RSV_Seg4_10959F -XTGGACCATWGAAGCYATATCA \ No newline at end of file diff --git a/bin/adapters/rsv/RSV_f.fa.old b/bin/adapters/rsv/RSV_f.fa.old deleted file mode 100644 index 76715ee..0000000 --- a/bin/adapters/rsv/RSV_f.fa.old +++ /dev/null @@ -1,14 +0,0 @@ ->PrefixNX/1 -AGATGTGTATAAGAGACAG ->Trans1 -TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG ->Trans2 -GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG ->RSV_v2_49F -GGCAAATAAGAATTTGATAAGTAC ->RSV_v2_3944F -GCCACARAGTCAATTYATAGTAG ->RSV_Seg3_7215F -TGATGCATCAATATCTCAAGTCA ->RSV_Seg4_10959F -TGGACCATWGAAGCYATATCA \ No newline at end of file diff --git a/bin/adapters/rsv/RSV_r.fa b/bin/adapters/rsv/RSV_r.fa deleted file mode 100644 index 93efd73..0000000 --- a/bin/adapters/rsv/RSV_r.fa +++ /dev/null @@ -1,20 +0,0 @@ ->PrefixNX/2 -AGATGTGTATAAGAGACAG ->Trans1_rc -CTGTCTCTTATACACATCTGACGCTGCCGACGA ->Trans2_rc -CTGTCTCTTATACACATCTCCGAGCCCACGAGAC ->RSV-F1-R1876 -CTGMACCATAGGCATTCATAAACAX ->RSV_F5_R6247 -TTGAGRTCTAACACTTTGCTGGTX ->RSV_Seg3_11165R -GRCCTATDCCTGCATACTCX ->RSVS1_4017R -CGTGTAGCTGTRTGYTTCCAAX ->RSV_F6-new-B-R7916 -CTCATAGCAACACATGCTGATTGX ->RSV_F6-new-A-R7911 -GAGTTTGCTCATGGCAACACATX ->RSV_Seg4_15333R -AGTGTCAAAAACTAATRTCTCGTX \ No newline at end of file diff --git a/bin/adapters/rsv/RSV_r.fa.old b/bin/adapters/rsv/RSV_r.fa.old deleted file mode 100644 index fcf6b74..0000000 --- a/bin/adapters/rsv/RSV_r.fa.old +++ /dev/null @@ -1,14 +0,0 @@ ->PrefixNX/2 -AGATGTGTATAAGAGACAG ->Trans1_rc -CTGTCTCTTATACACATCTGACGCTGCCGACGA ->Trans2_rc -CTGTCTCTTATACACATCTCCGAGCCCACGAGAC ->RSV_v2_4049R -TTTGATTGMAAAWCGTGTAGCTG ->RSV_Seg4_15333R -AGTGTCAAAAACTAATRTCTCGT ->RSV_Seg3_11165R -GRCCTATDCCTGCATACTC ->RSV_v2_7528R -TGTRACTGGTGTGYTTYTGG \ No newline at end of file diff --git a/snakefile b/snakefile index 17bbc3e..342cae0 100755 --- a/snakefile +++ b/snakefile @@ -1,7 +1,7 @@ ############################################################################# # # # wfi (WHO-FLU-IRMA) # -# pipeline for the assembly of illumina/ont # +# One Big IRMA Wrapper in Snakemake # # # # Created by Ammar Aziz # # # @@ -12,7 +12,7 @@ # DO NOT TOUCH ANYTHING # ############################################################################# -version = "0.3.4" +version = "0.3.5" import subprocess, sys, os, glob, shutil from time import sleep @@ -36,8 +36,8 @@ org = config["organism"].upper() seq_technology = config["technology"].lower() secondary_assembly = config["secondary_assembly"] subset = config["subset"] - -# set organism and gene segements (influenza) to keep +trim_prog = config["trim_prog"] +trim_org = config["trim_org"] if org == 'FLU': if seq_technology == 'illumina': @@ -85,34 +85,22 @@ elif org == 'RSV': else: raise ValueError("Check config file for 'organism' setting. Options are: FLU or RSV") + ## Message ------------------------------------------------------------------------ +onstart: + print("Run mode: " + run_mode) + print("Sequence Technology: " + seq_technology) + print("Organism: " + org) + print("IRMA Module: " + irma_module) + print("Secondary Assembly: " + str(secondary_assembly)) + print("Trimming using: " + str(trim_prog)) + print("\n") + onsuccess: print("wfi has successfully completed!") onerror: print("oops wfi has run into an issue. Look above, If rule SummaryReport failed there is no need to worry!") -## Functions ------------------------------------------------------------------- - -def fixNames(fafile, name, org): - sample_name, sample_number = name.split("_") - - if org == 'FLU': - res = "" - listSeg = {"HA":"4","MP":"7","NA":"6","NP":"5","NS":"8","PA":"3","PB1":"2","PB2":"1"} - for index, record in enumerate(SeqIO.parse(fafile, "fasta")): - gene = record.id.split("_")[1] - res += ">" + sample_name + "." + listSeg[gene] + "\n" + str(record.seq) + "\n" - return(res) - - elif org == 'RSV': - res = "" - for index, record in enumerate(SeqIO.parse(fafile, "fasta")): - res = ">" + sample_name + "\n" + str(record.seq) + "\n" - return(res) - - else: - sys.exit("Org not found. Error: fixNames") - ## Sequencing technology ------------------------------------------------------------------------ if seq_technology == 'illumina': SAMPLE_NAME, SAMPLE_NUMBER, lane_number, PAIR = glob_wildcards(IFQ + "/{sample_name}_{sample_number}_L{lane_number}_{pair}_001.fastq.gz") @@ -128,25 +116,29 @@ elif seq_technology == 'pgm': run_mode = 'single' ## Mode ------------------------------------------------------------------------ - +if run_mode not in ['paired', 'single']: + sys.exit("Configuration incorrect, check 'run_mode' it must be: paired or single") if run_mode == 'paired': - rule_mode = [expand(workspace + "qualtrim/{sample}.R1.fastq", sample = SAMPLES), - expand(workspace + "qualtrim/{sample}.R2.fastq", sample = SAMPLES)] -elif run_mode == 'single': - rule_mode = [expand(workspace + "qualtrim/{sample}.fastq", sample = SAMPLES)] - -## Message ------------------------------------------------------------------------ -print("Run mode: " + run_mode) -print("Sequence Technology: " + seq_technology) -print("Organism: " + org) -print("IRMA Module: " + irma_module) -print("Secondary Assembly: " + str(secondary_assembly)) -print("\n") -## Rules ------------------------------------------------------------------------ + rule_mode = [ + expand(workspace + "qualtrim/{sample}.R1.fastq", sample = SAMPLES), + expand(workspace + "qualtrim/{sample}.R2.fastq", sample = SAMPLES) + ] +if run_mode == 'single': + rule_mode = [ + expand(workspace + "qualtrim/{sample}.fastq", sample = SAMPLES) + ] + +## Trimming --------------------------------------------------------------------- +if trim_prog not in ['standard', 'tile']: + sys.exit("Configuration incorrect, check 'trim_prog' it must be: standard or tile") +if trim_org not in ['h1', 'h3']: + sys.exit("Configuration incorrect, check 'trim_org' it must be: h1 or h3. bvic is not supported") + +## Rules ------------------------------------------------------------------------ rule all: input: - # filter + # singe or paired rule_mode, # status expand(workspace + "status/filter_{sample}.txt", sample = SAMPLES), @@ -155,33 +147,104 @@ rule all: join(workspace + "status/plotting_complete.txt") -if run_mode == 'paired': - # Filter - rule filter_paired: - input: - faR1 = expand(IFQ + "{{sample}}_L001_{pair}_001.fastq.gz", pair = ["R1"]), - faR2 = expand(IFQ + "{{sample}}_L001_{pair}_001.fastq.gz", pair = ["R2"]) - output: - R1out = workspace + "qualtrim/{sample}.R1.fastq", - R2out = workspace + "qualtrim/{sample}.R2.fastq", - status = workspace + "status/filter_{sample}.txt" - params: - Fadapter = f"bin/adapters/{org}_f.fa", - Radapter = f"bin/adapters/{org}_r.fa" - threads: 2 - message: "Filtering and trimming {input.faR1} reads." - log: workspace + "logs/trim_{sample}.txt" - shell:""" - cutadapt {input.faR1} {input.faR2} \ - -j {threads} \ - -g file:{params.Fadapter} \ - -A file:{params.Radapter} \ - -o {output.R1out} \ - -p {output.R2out} \ - --report full 1> {log} - touch {output.status} - """ +if run_mode == 'paired': + # Filter standard + if trim_prog == 'standard': + rule filter_std: + input: + faR1 = expand(IFQ + "{{sample}}_L001_{pair}_001.fastq.gz", pair = ["R1"]), + faR2 = expand(IFQ + "{{sample}}_L001_{pair}_001.fastq.gz", pair = ["R2"]) + output: + R1out = workspace + "qualtrim/{sample}.R1.fastq", + R2out = workspace + "qualtrim/{sample}.R2.fastq", + status = workspace + "status/filter_{sample}.txt" + params: + Fadapter = f"bin/adapters/{org}_left.fa", + Radapter = f"bin/adapters/{org}_right.fa" + threads: 2 + message: "Filtering and trimming {input.faR1} reads." + log: workspace + "logs/trim_{sample}.txt" + shell:""" + cutadapt {input.faR1} {input.faR2} \ + -j {threads} \ + -g file:{params.Fadapter} \ + -A file:{params.Radapter} \ + -o {output.R1out} \ + -p {output.R2out} \ + --report full 1> {log} + + touch {output.status} + """ + # Filter tile + if trim_prog == 'tile': + rule filter_tile: + input: + faR1 = expand(IFQ + "{{sample}}_L001_{pair}_001.fastq.gz", pair = ["R1"]), + faR2 = expand(IFQ + "{{sample}}_L001_{pair}_001.fastq.gz", pair = ["R2"]) + output: + R1_out_left = workspace + "qualtrim/{sample}_left.R1.fastq", + R2_out_left = workspace + "qualtrim/{sample}_left.R2.fastq", + R1_out_right = workspace + "qualtrim/{sample}.R1.fastq", + R2_out_right = workspace + "qualtrim/{sample}.R2.fastq", + status = workspace + "status/filter_{sample}.txt" + params: + Fadapter = f"bin/adapters/{org}_tile_left_{trim_org}.fa", + Radapter = f"bin/adapters/{org}_tile_right_{trim_org}.fa", + stats1 = workspace + "logs/trimStats1_{sample}.txt", + stats2 = workspace + "logs/trimStats2_{sample}.txt", + refstats1 = workspace + "logs/trimRefStats1_{sample}.txt", + refstats2 = workspace + "logs/trimRefStats2_{sample}.txt", + k = 9, + mink = 3, + restrict = 30, + hdist = 1 + threads: 2 + message: "Filtering and trimming {input.faR1} reads." + log: workspace + "logs/trim_{sample}.txt" + shell:""" + bbduk.sh in={input.faR1} \ + in2={input.faR2} \ + ktrim=l \ + mm=f \ + hdist={params.hdist} \ + rcomp=t \ + ref={params.Fadapter} \ + ordered=t \ + minlen=0 \ + minlength=0 \ + trimq=0 \ + k={params.k} \ + mink={params.mink} \ + threads={threads} \ + restrictleft={params.restrict} \ + out={output.R1_out_left} \ + out2={output.R2_out_left} \ + stats={params.stats1} \ + statscolumns=5 2>> {log} + + bbduk.sh in={output.R1_out_left} \ + in2={output.R2_out_left} \ + ktrim=r \ + mm=f \ + hdist={params.hdist} \ + rcomp=t \ + ref={params.Radapter} \ + ordered=t \ + minlen=0 \ + minlength=0 \ + trimq=0 \ + k={params.k} \ + mink={params.mink} \ + threads={threads} \ + restrictright={params.restrict} \ + out={output.R1_out_right} \ + out2={output.R2_out_right} \ + stats={params.stats2} \ + statscolumns=5 2>> {log} + + touch {output.status} + """ # Assembly rule irma_paired: @@ -204,7 +267,7 @@ if run_mode == 'paired': """ elif run_mode == 'single': - rule filter_single: + rule filter_std: input: single = expand(IFQ + "{{sample}}.fastq.gz"), output: @@ -243,7 +306,7 @@ elif run_mode == 'single': touch > {output.status} """ else: - sys.exit("Something went wrong with the filter+irma command") + sys.exit("Something went wrong with the filter+irma command. Check output") # rename and sort into subtypes rule renameSubtype: diff --git a/tools/auto_install.sh b/tools/auto_install.sh index 3c5712b..b579e07 100755 --- a/tools/auto_install.sh +++ b/tools/auto_install.sh @@ -169,7 +169,7 @@ conda config --add channels conda-forge title 'Creating conda environment "wfi" and installing dependancies' -DEPEND=(r-ggplot2 r-dplyr r-stringi r-stringr r-tidyr r-cowplot r-gridExtra r-optparse) +DEPEND=(r-ggplot2 r-dplyr r-tidyr r-cowplot r-gridExtra r-optparse) conda install --yes -n base -c conda-forge mamba @@ -196,10 +196,10 @@ title 'Installing wfi' message 'Attempting to download latest release of wfi' # fix the below to /releases/latest when ready -curl -s https://api.github.com/repos/ammaraziz/wfi/releases | grep "browser_download_url" | cut -d '"' -f 4 | wget -qi - +curl -s https://api.github.com/repos/ammaraziz/wfi/releases | grep "browser_download_url" | cut -d '"' -f 4 | head -n 1 | wget -qi - if [[ $? -ne 0 ]]; then oops "wget failed, trying --no-check-certificate" - curl -s https://api.github.com/repos/ammaraziz/wfi/releases | grep "browser_download_url" | cut -d '"' -f 4 | wget --no-check-certificate -qi - + curl -s https://api.github.com/repos/ammaraziz/wfi/releases | grep "browser_download_url" | cut -d '"' -f 4 | head -n 1 | wget --no-check-certificate -qi - fi # uncompress directory and install modules diff --git a/tools/summaryReport.R b/tools/summaryReport.R index 416f072..a629ff3 100755 --- a/tools/summaryReport.R +++ b/tools/summaryReport.R @@ -111,10 +111,6 @@ source("./tools/qcTable.R") main <- function() { - #print params for user - write(paste0("organism: ", opts$organism), stdout()) - write(paste0("input: ", opts$input), stdout()) - write(paste0("output: ", opts$output), stdout()) opts$organism = toupper(opts$organism) # get file names diff --git a/wfi_config.yaml b/wfi_config.yaml index 0461956..63b75cc 100644 --- a/wfi_config.yaml +++ b/wfi_config.yaml @@ -24,6 +24,17 @@ secondary_assembly: False # [illumina, ont, pgm] technology: illumina +# Trimming mode +# [standard, tile] +# standard uses cutadapt +# tile uses bbduk +trim_prog: tile +# only for trim_prog 'tile' +# specifies organism specific primers +# [h1, h3] +trim_org: h1 + + # FLU Only # [True, False] # True = ha, na, mp