From 5daf30d4e292189b7117fa350f8a45d943cf8052 Mon Sep 17 00:00:00 2001 From: ctbus Date: Tue, 11 Apr 2023 15:57:25 -0400 Subject: [PATCH 1/8] Add CI and conda env --- .github/workflows/codecov.yml | 35 ++++++++++++++++++++++ .github/workflows/linter.yml | 26 ++++++++++++++++ .github/workflows/python-publish.yml | 39 ++++++++++++++++++++++++ .github/workflows/tests.yml | 45 ++++++++++++++++++++++++++++ primertrim_env.yml | 5 ++++ 5 files changed, 150 insertions(+) create mode 100755 .github/workflows/codecov.yml create mode 100755 .github/workflows/linter.yml create mode 100644 .github/workflows/python-publish.yml create mode 100644 .github/workflows/tests.yml create mode 100755 primertrim_env.yml diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml new file mode 100755 index 0000000..978eb43 --- /dev/null +++ b/.github/workflows/codecov.yml @@ -0,0 +1,35 @@ +name: CodeCov + +on: + pull_request: + branches: [master, main] + push: + branches: [master, main] + schedule: + - cron: "0 13 * * 1" + +jobs: + run: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + - name: setup-conda + uses: s-weigand/setup-conda@v1.1.0 + with: + conda-channels: '' + - name: Install dependencies + run: | + conda env update --file primertrim_env.yaml + python -m pip install --upgrade pip + python -m pip install pytest + python -m pip install pytest-cov + python -m pip install . + - name: Run tests and collect coverage + run: pytest --cov tests/ + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 \ No newline at end of file diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100755 index 0000000..9d72b1e --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,26 @@ +name: Super-Linter + +on: + pull_request: + branches: [master, main, dev] + push: + branches: [master, main] + +jobs: + super-linter: + name: Lint Codebase + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + uses: actions/checkout@v3 + + - name: Run Super-Linter + uses: github/super-linter@v4 + env: + VALIDATE_ALL_CODEBASE: true + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + VALIDATE_PYTHON_BLACK: true + + FILTER_REGEX_INCLUDE: primertrim/.*|tests/.*|setup.py \ No newline at end of file diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..ec70354 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,39 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..1ad3d8e --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,45 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Tests + +on: + pull_request: + branches: [master, main, dev] + push: + branches: [master, main] + schedule: + - cron: "0 13 * * 1" + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: setup-conda + uses: s-weigand/setup-conda@v1.1.0 + with: + conda-channels: '' + - name: Install dependencies + run: | + conda env update --file primertrim_env.yaml + python -m pip install --upgrade pip + python -m pip install pytest + python -m pip install . + - name: Unit tests + run: | + pytest -vvl tests/ + - name: Other tests + if: matrix.python-version == '3.10' + run: | + echo "Other tests" \ No newline at end of file diff --git a/primertrim_env.yml b/primertrim_env.yml new file mode 100755 index 0000000..34e2aed --- /dev/null +++ b/primertrim_env.yml @@ -0,0 +1,5 @@ +name: primertrim +channels: + - bioconda +dependencies: + - vsearch \ No newline at end of file From c9efde8597cb334b510e452d1c066680741194e9 Mon Sep 17 00:00:00 2001 From: ctbus Date: Tue, 11 Apr 2023 22:20:07 -0400 Subject: [PATCH 2/8] Import troubles --- .github/workflows/codecov.yml | 2 +- .github/workflows/tests.yml | 2 +- README.md | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 978eb43..00ea7f0 100755 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -28,7 +28,7 @@ jobs: python -m pip install --upgrade pip python -m pip install pytest python -m pip install pytest-cov - python -m pip install . + python -m pip install -e . - name: Run tests and collect coverage run: pytest --cov tests/ - name: Upload coverage to Codecov diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1ad3d8e..8db37c0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,7 +35,7 @@ jobs: conda env update --file primertrim_env.yaml python -m pip install --upgrade pip python -m pip install pytest - python -m pip install . + python -m pip install -e . - name: Unit tests run: | pytest -vvl tests/ diff --git a/README.md b/README.md index 8d4a579..ced1232 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ Detect short primer sequences in FASTQ reads and trim the reads accordingly. ```bash git clone https://github.com/PennChopMicrobiomeProgram/primertrim.git cd primertrim -pip install -e . +conda env create -f primertrim_env.yml +conda activate primertrim +pip install . ``` ## Algorithm From 64719ab9231dc6db91150a5d8c8e4e4da0b0d9ce Mon Sep 17 00:00:00 2001 From: ctbus Date: Wed, 12 Apr 2023 10:05:39 -0400 Subject: [PATCH 3/8] Whoops, yml not yaml --- .github/workflows/codecov.yml | 2 +- .github/workflows/tests.yml | 2 +- primertrim/align.py | 58 +++++++++++++++++------- primertrim/command.py | 83 ++++++++++++++++++++++++----------- primertrim/dna.py | 10 +++-- primertrim/matcher.py | 46 ++++++++++--------- primertrim/trimmable_reads.py | 21 ++++++--- setup.py | 22 +++++----- tests/test_command.py | 20 ++++++--- tests/test_dna.py | 14 +++--- tests/test_matcher.py | 30 ++++++++----- tests/test_remove_primers.py | 1 - 12 files changed, 196 insertions(+), 113 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 00ea7f0..b9b17cc 100755 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -24,7 +24,7 @@ jobs: conda-channels: '' - name: Install dependencies run: | - conda env update --file primertrim_env.yaml + conda env update --file primertrim_env.yml python -m pip install --upgrade pip python -m pip install pytest python -m pip install pytest-cov diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8db37c0..07cd464 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: conda-channels: '' - name: Install dependencies run: | - conda env update --file primertrim_env.yaml + conda env update --file primertrim_env.yml python -m pip install --upgrade pip python -m pip install pytest python -m pip install -e . diff --git a/primertrim/align.py b/primertrim/align.py index 866ebb2..0164ecd 100644 --- a/primertrim/align.py +++ b/primertrim/align.py @@ -3,8 +3,21 @@ import tempfile DEFAULT_BLAST_FIELDS = [ - "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", - "qstart", "qend", "sstart", "send", "qlen", "slen", "qseq", "sseq", "qstrand", + "qseqid", + "sseqid", + "pident", + "length", + "mismatch", + "gapopen", + "qstart", + "qend", + "sstart", + "send", + "qlen", + "slen", + "qseq", + "sseq", + "qstrand", ] BLAST_FIELD_TYPES = { @@ -43,10 +56,12 @@ "qstrand": "qstrand", } + def write_fasta(f, seqs): for seq_id, seq in seqs: f.write(">{}\n{}\n".format(seq_id, seq)) + class VsearchAligner: def __init__(self, ref_seqs_fp): self.ref_seqs_fp = ref_seqs_fp @@ -58,7 +73,8 @@ def search(self, seqs, input_fp=None, output_fp=None, **kwargs): """Search seqs and return hits""" if input_fp is None: infile = tempfile.NamedTemporaryFile( - suffix=".fasta", mode="w+t", encoding="utf-8") + suffix=".fasta", mode="w+t", encoding="utf-8" + ) write_fasta(infile, seqs) infile.seek(0) input_fp = infile.name @@ -67,8 +83,7 @@ def search(self, seqs, input_fp=None, output_fp=None, **kwargs): write_fasta(f, seqs) if output_fp is None: - outfile = tempfile.NamedTemporaryFile( - suffix=".txt", mode="wt") + outfile = tempfile.NamedTemporaryFile(suffix=".txt", mode="wt") output_fp = outfile.name self._call(input_fp, output_fp, **kwargs) @@ -82,18 +97,29 @@ def _call(self, query_fp, output_fp, min_id=0.85, threads=None): userfields_arg = "+".join(BLAST_TO_VSEARCH[f] for f in self.fields) args = [ "vsearch", - "--usearch_global", query_fp, - "--minseqlength", "10", - "--mincols", "10", - "--id", id_arg, - "--wordlength", "4", - "--strand", "both", - "--maxaccepts", "4", - "--minwordmatches", "3", + "--usearch_global", + query_fp, + "--minseqlength", + "10", + "--mincols", + "10", + "--id", + id_arg, + "--wordlength", + "4", + "--strand", + "both", + "--maxaccepts", + "4", + "--minwordmatches", + "3", "--top_hits_only", - "--userfields", userfields_arg, - "--db", self.ref_seqs_fp, - "--userout", output_fp, + "--userfields", + userfields_arg, + "--db", + self.ref_seqs_fp, + "--userout", + output_fp, ] if threads is not None: threads_arg = "{:d}".format(threads) diff --git a/primertrim/command.py b/primertrim/command.py index 5a801d7..d6ff5fd 100644 --- a/primertrim/command.py +++ b/primertrim/command.py @@ -5,67 +5,98 @@ from .trimmable_reads import TrimmableReads from .matcher import ( - CompleteMatcher, PartialMatcher, AlignmentMatcher, + CompleteMatcher, + PartialMatcher, + AlignmentMatcher, ) from .dna import deambiguate def main(argv=None): p = argparse.ArgumentParser() - p.add_argument( - "primer", nargs="+", - help="Primer sequence to be trimmed") + p.add_argument("primer", nargs="+", help="Primer sequence to be trimmed") io_group = p.add_argument_group("File I/O") io_group.add_argument( - "-i", "--input-fastq", type=argparse.FileType('r'), - help="Input FASTQ file to be trimmed (default: standard input)") + "-i", + "--input-fastq", + type=argparse.FileType("r"), + help="Input FASTQ file to be trimmed (default: standard input)", + ) io_group.add_argument( - "-o", "--output-fastq", type=argparse.FileType('w'), - help="Output FASTQ file after trimming (default: standard output)") + "-o", + "--output-fastq", + type=argparse.FileType("w"), + help="Output FASTQ file after trimming (default: standard output)", + ) io_group.add_argument( - "--log", type=argparse.FileType('w'), - help="Log file of primers and location (default: not written)") + "--log", + type=argparse.FileType("w"), + help="Log file of primers and location (default: not written)", + ) io_group.add_argument( - "--min-length", type=int, default=50, + "--min-length", + type=int, + default=50, help=( "Minimum length of reads written to the output FASTQ file. " - "(default: %(default)s)")) + "(default: %(default)s)" + ), + ) complete_group = p.add_argument_group("Complete, partial matching stages") complete_group.add_argument( - "--no-revcomp", action='store_true', + "--no-revcomp", + action="store_true", help=( "Don't match the reverse complement during the complete and " - "partial matching stages")) + "partial matching stages" + ), + ) complete_group.add_argument( - "--mismatches", type=int, default=1, + "--mismatches", + type=int, + default=1, help=( "Number of mismatches to primer allowed during the complete " - "matching stage (default: %(default)s)")) + "matching stage (default: %(default)s)" + ), + ) complete_group.add_argument( - "--min-partial", type=int, default=8, + "--min-partial", + type=int, + default=8, help=( "Minimum length of match during the partial matching stage " - "(default: %(default)s)")) + "(default: %(default)s)" + ), + ) alignment_group = p.add_argument_group("Alignment matching stage") alignment_group.add_argument( - "--alignment", action="store_true", - help="Activate the alignment matching stage") + "--alignment", action="store_true", help="Activate the alignment matching stage" + ) alignment_group.add_argument( "--alignment-dir", - help="Directory for alignment files (default: temp directory)") + help="Directory for alignment files (default: temp directory)", + ) alignment_group.add_argument( - "--threads", type=int, + "--threads", + type=int, help=( "Number of CPU threads to use during the alignment stage " - "(default: all the threads)")) + "(default: all the threads)" + ), + ) alignment_group.add_argument( - "--align_id", type=float, default=0.85, + "--align_id", + type=float, + default=0.85, help=( "Minimum percent identity to consider a primer match in vsearch alignment." - "(default: %(default)s)")) + "(default: %(default)s)" + ), + ) args = p.parse_args(argv) if args.input_fastq is None: @@ -82,7 +113,7 @@ def main(argv=None): matchers = [ CompleteMatcher(queryset, args.mismatches, not args.no_revcomp), PartialMatcher(queryset, args.min_partial, not args.no_revcomp), - ] + ] if args.alignment: if args.alignment_dir: diff --git a/primertrim/dna.py b/primertrim/dna.py index a429a53..8a640aa 100644 --- a/primertrim/dna.py +++ b/primertrim/dna.py @@ -1,9 +1,10 @@ import itertools + def reverse_complement(seq): rc = [COMPLEMENT_BASES[x] for x in seq] rc.reverse() - return ''.join(rc) + return "".join(rc) AMBIGUOUS_BASES = { @@ -22,7 +23,7 @@ def reverse_complement(seq): "V": "CAG", "D": "TAG", "N": "TCAG", - } +} # Ambiguous base codes for all bases EXCEPT the key @@ -31,7 +32,7 @@ def reverse_complement(seq): "C": "D", "A": "B", "G": "H", - } +} def deambiguate(seq): @@ -44,7 +45,8 @@ def deambiguate(seq): "C": "G", "A": "T", "G": "C", - } +} + def partial_seqs_left(seq, min_length): if min_length < len(seq): diff --git a/primertrim/matcher.py b/primertrim/matcher.py index faaec2f..c7db111 100644 --- a/primertrim/matcher.py +++ b/primertrim/matcher.py @@ -4,18 +4,23 @@ import os.path from .dna import ( - AMBIGUOUS_BASES_COMPLEMENT, deambiguate, reverse_complement, - replace_with_n, partial_seqs_left, partial_seqs_right, + AMBIGUOUS_BASES_COMPLEMENT, + deambiguate, + reverse_complement, + replace_with_n, + partial_seqs_left, + partial_seqs_right, ) from .align import VsearchAligner PrimerMatch = collections.namedtuple( - "PrimerMatch", ["method", "start", "mismatches", "primerseq"]) + "PrimerMatch", ["method", "start", "mismatches", "primerseq"] +) class Matcher(abc.ABC): def __init__(self, queryset, match_reverse_complement=True): - queryset = list(queryset) # We iterate through the queryset twice + queryset = list(queryset) # We iterate through the queryset twice self.queryset = queryset.copy() if match_reverse_complement: for seq in queryset: @@ -43,7 +48,8 @@ def __init__(self, queryset, max_mismatch, match_reverse_complement=True): # "mismatched queryset." possible_mismatches = range(max_mismatch + 1) self.mismatched_queryset = [ - self._mismatched_queries(n) for n in possible_mismatches] + self._mismatched_queries(n) for n in possible_mismatches + ] def _mismatched_queries(self, n_mismatch): # The generator is provides a sequence for one-time use, but @@ -53,7 +59,7 @@ def _mismatched_queries(self, n_mismatch): def _iter_mismatched_queries(self, n_mismatch): # This algorithm is terrible unless the number of mismatches is very small - assert(n_mismatch in [0, 1, 2, 3]) + assert n_mismatch in [0, 1, 2, 3] for query in self.queryset: idx_sets = itertools.combinations(range(len(query)), n_mismatch) for idx_set in idx_sets: @@ -79,8 +85,7 @@ def find_match(self, seq): if start_idx > -1: end_idx = start_idx + len(query) primerseq = seq[start_idx:end_idx] - return PrimerMatch( - "Complete", start_idx, n_mismatches, primerseq) + return PrimerMatch("Complete", start_idx, n_mismatches, primerseq) class PartialMatcher(Matcher): @@ -99,18 +104,18 @@ def __init__(self, queryset, min_length, match_reverse_complement=True): def find_match(self, seq): for left_partial_query in self.partial_queries_left: if seq.startswith(left_partial_query): - return PrimerMatch("Partial", 0, 0, left_partial_query) + return PrimerMatch("Partial", 0, 0, left_partial_query) for right_partial_query in self.partial_queries_right: - if seq.endswith(right_partial_query): - start_idx = len(seq) - len(right_partial_query) - return PrimerMatch("Partial", start_idx, 0, right_partial_query) + if seq.endswith(right_partial_query): + start_idx = len(seq) - len(right_partial_query) + return PrimerMatch("Partial", start_idx, 0, right_partial_query) class AlignmentMatcher(Matcher): def __init__(self, queryset, alignment_dir, align_id, cores=1): self.queryset = queryset - assert(os.path.exists(alignment_dir)) - assert(os.path.isdir(alignment_dir)) + assert os.path.exists(alignment_dir) + assert os.path.isdir(alignment_dir) self.alignment_dir = alignment_dir self.align_id = align_id self.cores = cores @@ -132,8 +137,8 @@ def find_in_seqs(self, seqs): a = VsearchAligner(subject_fp) hits = a.search( - seqs.items(), query_fp, result_fp, - min_id=self.align_id, threads=self.cores) + seqs.items(), query_fp, result_fp, min_id=self.align_id, threads=self.cores + ) for hit in hits: seq_id = hit["qseqid"] @@ -142,11 +147,10 @@ def find_in_seqs(self, seqs): start_idx = hit["qstart"] - 1 end_idx = hit["qend"] if hit["qstrand"] == "-": - start_idx = hit["qlen"]-hit["qend"] - end_idx = hit["qlen"]-hit["qstart"]+1 - assert(start_idx < end_idx) + start_idx = hit["qlen"] - hit["qend"] + end_idx = hit["qlen"] - hit["qstart"] + 1 + assert start_idx < end_idx seq = seqs[seq_id] primerseq = seq[start_idx:end_idx] - matchobj = PrimerMatch( - "Alignment", start_idx, mismatches, primerseq) + matchobj = PrimerMatch("Alignment", start_idx, mismatches, primerseq) yield seq_id, matchobj diff --git a/primertrim/trimmable_reads.py b/primertrim/trimmable_reads.py index 0af610a..8e1951b 100644 --- a/primertrim/trimmable_reads.py +++ b/primertrim/trimmable_reads.py @@ -7,7 +7,7 @@ def __init__(self, reads): for desc, seq, qual in reads: read_id = get_read_id(desc) if read_id in self.descs: - raise ValueError("Duplicate read ID: {}",format(read_id)) + raise ValueError("Duplicate read ID: {}", format(read_id)) self.descs[read_id] = desc self.seqs[read_id] = seq self.quals[read_id] = qual @@ -32,8 +32,8 @@ def output_reads(self, min_length=0): seq = self.seqs[read_id] qual = self.quals[read_id] if matchobj is not None: - seq = seq[:matchobj.start] - qual = qual[:matchobj.start] + seq = seq[: matchobj.start] + qual = qual[: matchobj.start] if len(seq) >= min_length: yield (desc, seq, qual) @@ -44,13 +44,20 @@ def output_loginfo(self): yield (read_id, "No match", len(seq), None, None) else: yield ( - read_id, matchobj.method, matchobj.start, - matchobj.mismatches, matchobj.primerseq, + read_id, + matchobj.method, + matchobj.start, + matchobj.mismatches, + matchobj.primerseq, ) loginfo_colnames = [ - "read_id", "match_type", "trimmed_length", "mismatches", - "observed_primer"] + "read_id", + "match_type", + "trimmed_length", + "mismatches", + "observed_primer", + ] def parse_fastq(f): diff --git a/setup.py b/setup.py index 47db768..214a831 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,16 @@ from setuptools import setup setup( - name='primertrim', - version='0.0.2', - description='Trim primer sequences from FASTQ files', - author='PennCHOP Microbiome Program', - author_email='BITTINGERK@chop.edu', - url='https://github.com/PennChopMicrobiomeProgram/primertrim', - packages=['primertrim'], - entry_points = { - 'console_scripts': [ - 'ptrim=primertrim.command:main', + name="primertrim", + version="0.0.2", + description="Trim primer sequences from FASTQ files", + author="PennCHOP Microbiome Program", + author_email="BITTINGERK@chop.edu", + url="https://github.com/PennChopMicrobiomeProgram/primertrim", + packages=["primertrim"], + entry_points={ + "console_scripts": [ + "ptrim=primertrim.command:main", ], - } + }, ) diff --git a/tests/test_command.py b/tests/test_command.py index a0e2b52..fea87c1 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -18,13 +18,19 @@ def test_main_script(tmp_path): log_fp = str(tmp_path / "out.log") args = [ "GCATCGATGAAGAACGCAGC", - "-i", input_fp, - "-o", output_fp, - "--log", log_fp, - "--mismatches", "0", - "--min-partial", "100", + "-i", + input_fp, + "-o", + output_fp, + "--log", + log_fp, + "--mismatches", + "0", + "--min-partial", + "100", "--alignment", - "--align_id", "0.7" + "--align_id", + "0.7", ] main(args) @@ -32,4 +38,4 @@ def test_main_script(tmp_path): assert read_from(log_fp) == read_from(expected_log_fp) expected_output_fp = str(DATA_DIR / "trimmed_example.fastq") - assert read_from(output_fp) == read_from(expected_output_fp) \ No newline at end of file + assert read_from(output_fp) == read_from(expected_output_fp) diff --git a/tests/test_dna.py b/tests/test_dna.py index 408a80c..d84a91e 100644 --- a/tests/test_dna.py +++ b/tests/test_dna.py @@ -1,17 +1,17 @@ from primertrim.dna import ( - partial_seqs_left, partial_seqs_right, + partial_seqs_left, + partial_seqs_right, ) + def test_partial_seqs_left(): - assert list(partial_seqs_left("ABCDEFG", 3)) == \ - ["BCDEFG", "CDEFG", "DEFG", "EFG"] + assert list(partial_seqs_left("ABCDEFG", 3)) == ["BCDEFG", "CDEFG", "DEFG", "EFG"] def test_partial_seqs_right(): - assert list(partial_seqs_right("ABCDEFG", 3)) == \ - ["ABCDEF", "ABCDE", "ABCD", "ABC"] + assert list(partial_seqs_right("ABCDEFG", 3)) == ["ABCDEF", "ABCDE", "ABCD", "ABC"] def test_partial_no_results(): - assert list(partial_seqs_left("ABCDE", 5)) == [] - assert list(partial_seqs_right("ABCDE", 5)) == [] + assert list(partial_seqs_left("ABCDE", 5)) == [] + assert list(partial_seqs_right("ABCDE", 5)) == [] diff --git a/tests/test_matcher.py b/tests/test_matcher.py index f2405fb..f73825c 100644 --- a/tests/test_matcher.py +++ b/tests/test_matcher.py @@ -1,15 +1,18 @@ from primertrim.matcher import ( PrimerMatch, - CompleteMatcher, PartialMatcher, AlignmentMatcher, + CompleteMatcher, + PartialMatcher, + AlignmentMatcher, ) import os + def test_complete_match(): m = CompleteMatcher(["TTTTTT"], 1, False) assert m.find_match("AGATTTTTT") == PrimerMatch("Complete", 3, 0, "TTTTTT") assert m.find_match("AATTTGTT") == PrimerMatch("Complete", 2, 1, "TTTGTT") - assert m.find_match("AATTGGTT") == None # Two mismatches is too much + assert m.find_match("AATTGGTT") == None # Two mismatches is too much def test_partial_match(): @@ -17,19 +20,24 @@ def test_partial_match(): assert m.find_match("AAAAAGTCGT") == PrimerMatch("Partial", 0, 0, "AAAAA") assert m.find_match("AAAAGTCGT") == PrimerMatch("Partial", 0, 0, "AAAA") assert m.find_match("GTCGAAAAA") == PrimerMatch("Partial", 4, 0, "AAAAA") - assert m.find_match("AAAGTCGGCT") == None # Length is 3, no match + assert m.find_match("AAAGTCGGCT") == None # Length is 3, no match + def test_align_match(tmp_path): align_fp = str(tmp_path / "vsearch_align_fp") os.mkdir(align_fp) - test_dict = {"test": "GGGGGAAAAA", - "test2": "GGGGGAACAA", - "test3": "GGGGGGGAAAAA", - "test4": "GGGGGGGCAAAACCCCCTTTTT"} - p_match = [PrimerMatch("Alignment", 0, 0, "GGGGGAAAAA"), - PrimerMatch("Alignment", 0, 1, "GGGGGAACAA"), - PrimerMatch("Alignment", 2, 0, "GGGGGAAAAA"), - PrimerMatch("Alignment", 2, 1, "GGGGGCAAAACCCCCTTTTT")] + test_dict = { + "test": "GGGGGAAAAA", + "test2": "GGGGGAACAA", + "test3": "GGGGGGGAAAAA", + "test4": "GGGGGGGCAAAACCCCCTTTTT", + } + p_match = [ + PrimerMatch("Alignment", 0, 0, "GGGGGAAAAA"), + PrimerMatch("Alignment", 0, 1, "GGGGGAACAA"), + PrimerMatch("Alignment", 2, 0, "GGGGGAAAAA"), + PrimerMatch("Alignment", 2, 1, "GGGGGCAAAACCCCCTTTTT"), + ] m = AlignmentMatcher(["GGGGGAAAAACCCCCTTTTT"], align_fp, 0.8) m_gen = m.find_in_seqs(test_dict.items()) i = 0 diff --git a/tests/test_remove_primers.py b/tests/test_remove_primers.py index 139597f..8b13789 100644 --- a/tests/test_remove_primers.py +++ b/tests/test_remove_primers.py @@ -1,2 +1 @@ - From 82043c6cd8eae39a071a39dc94f048180e63ed85 Mon Sep 17 00:00:00 2001 From: ctbus Date: Wed, 12 Apr 2023 10:11:46 -0400 Subject: [PATCH 4/8] Debug --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 07cd464..c520b77 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,6 +38,8 @@ jobs: python -m pip install -e . - name: Unit tests run: | + conda list + vsearch --version pytest -vvl tests/ - name: Other tests if: matrix.python-version == '3.10' From e19597a324e75d7d92adbccee32aaf2ef05dded9 Mon Sep 17 00:00:00 2001 From: ctbus Date: Wed, 12 Apr 2023 11:12:55 -0400 Subject: [PATCH 5/8] Debug --- README.md | 2 +- primertrim_env.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index ced1232..f2197da 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Detect short primer sequences in FASTQ reads and trim the reads accordingly. ```bash git clone https://github.com/PennChopMicrobiomeProgram/primertrim.git cd primertrim -conda env create -f primertrim_env.yml +conda env create -f primertrim_env.yml -n primertrim_env conda activate primertrim pip install . ``` diff --git a/primertrim_env.yml b/primertrim_env.yml index 34e2aed..3a26c02 100755 --- a/primertrim_env.yml +++ b/primertrim_env.yml @@ -1,4 +1,3 @@ -name: primertrim channels: - bioconda dependencies: From 2c4e45ab67c83532f5c78a60d9083fef120dd234 Mon Sep 17 00:00:00 2001 From: ctbus Date: Wed, 12 Apr 2023 11:16:32 -0400 Subject: [PATCH 6/8] Remove debug --- .github/workflows/tests.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c520b77..07cd464 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,8 +38,6 @@ jobs: python -m pip install -e . - name: Unit tests run: | - conda list - vsearch --version pytest -vvl tests/ - name: Other tests if: matrix.python-version == '3.10' From b46f4d7c0e8c2f6a8d1ad0dfb184775f978e1db5 Mon Sep 17 00:00:00 2001 From: ctbus Date: Wed, 12 Apr 2023 11:38:16 -0400 Subject: [PATCH 7/8] Add badges --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index f2197da..ee30cea 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,17 @@ # Primer trim + +[![Tests](https://github.com/PennChopMicrobiomeProgram/primertrim/actions/workflows/tests.yml/badge.svg)](https://github.com/PennChopMicrobiomeProgram/primertrim/actions/workflows/tests.yml) +[![CodeCov](https://github.com/PennChopMicrobiomeProgram/primertrim/actions/workflows/codecov.yml/badge.svg)](https://github.com/PennChopMicrobiomeProgram/primertrim/actions/workflows/codecov.yml) +[![Super-Linter](https://github.com/PennChopMicrobiomeProgram/primertrim/actions/workflows/linter.yml/badge.svg)](https://github.com/PennChopMicrobiomeProgram/primertrim/actions/workflows/linter.yml) + + Detect short primer sequences in FASTQ reads and trim the reads accordingly. ## Installation +Primertrim requires vsearch, our recommended method of installation is through conda and is shown here: + ```bash git clone https://github.com/PennChopMicrobiomeProgram/primertrim.git cd primertrim From fc2bb4afe244f097b9182019ab664c3648c0ec02 Mon Sep 17 00:00:00 2001 From: ctbus Date: Wed, 12 Apr 2023 11:46:54 -0400 Subject: [PATCH 8/8] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ee30cea..2deba16 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Primertrim requires vsearch, our recommended method of installation is through c ```bash git clone https://github.com/PennChopMicrobiomeProgram/primertrim.git cd primertrim -conda env create -f primertrim_env.yml -n primertrim_env +conda env create -f primertrim_env.yml -n primertrim conda activate primertrim pip install . ```