From b7ceab8473160070507cbb7c9bf6da1e78e4b1a3 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Tue, 6 Aug 2024 15:08:29 -0400 Subject: [PATCH] Adjust tests for vsearch --- .tests/e2e/test_full_run.py | 43 ++++++------------------------------- README.md | 6 ++++-- config.yml | 2 +- sbx_vsearch.smk | 2 +- 4 files changed, 13 insertions(+), 40 deletions(-) diff --git a/.tests/e2e/test_full_run.py b/.tests/e2e/test_full_run.py index aeb0d2e..d51d976 100644 --- a/.tests/e2e/test_full_run.py +++ b/.tests/e2e/test_full_run.py @@ -9,8 +9,7 @@ @pytest.fixture def setup(tmpdir): reads_fp = Path(".tests/data/reads/").resolve() - hosts_fp = Path(".tests/data/hosts/").resolve() - db_fp = Path(".tests/data/db/").resolve() + db_fp = Path(".tests/data/ref/").resolve() project_dir = tmpdir / "project" @@ -18,20 +17,7 @@ def setup(tmpdir): config_fp = project_dir / "sunbeam_config.yml" - config_str = f"sbx_kraken: {{kraken_db_fp: {db_fp}}}" - sp.check_output( - [ - "sunbeam", - "config", - "modify", - "-i", - "-s", - f"{config_str}", - f"{config_fp}", - ] - ) - - config_str = f"qc: {{host_fp: {hosts_fp}}}" + config_str = f"sbx_vsearch: {{db_fp: {db_fp}}}" sp.check_output( [ "sunbeam", @@ -63,7 +49,7 @@ def run_sunbeam(setup): "conda", "--profile", project_dir, - "all_classify", + "all_vsearch", "--directory", tmpdir, ] @@ -84,24 +70,9 @@ def run_sunbeam(setup): def test_full_run(run_sunbeam): output_fp, benchmarks_fp = run_sunbeam - all_samples_fp = output_fp / "classify" / "kraken" / "all_samples.tsv" + long_report_fp = output_fp / "mapping" / "vsearch" / "LONG_report.tsv" + long_fasta_fp = output_fp / "mapping" / "vsearch" / "LONG.fasta" # Check output - assert all_samples_fp.exists() - - with open(all_samples_fp) as f: - header_line = f.readline() - print(f"Header line: {header_line}") - assert "TEST-taxa" in header_line - assert "EMPTY-taxa" in header_line - assert "Consensus Lineage" in header_line - test_index = header_line.split("\t").index("TEST-taxa") - empty_index = header_line.split("\t").index("EMPTY-taxa") - - lines = f.readlines() - print(lines) - for line in lines: - if line[0] == "2": - fields = line.split("\t") - assert int(fields[empty_index]) == 0 - assert int(fields[test_index]) > 0 + assert long_report_fp.exists() + assert long_fasta_fp.exists() diff --git a/README.md b/README.md index 65a698d..7aa5ca3 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ # sbx_vsearch - +[![Tests](https://github.com/sunbeam-labs/sbx_vsearch/actions/workflows/pr.yml/badge.svg)](https://github.com/sunbeam-labs/sbx_vsearch/actions/workflows/pr.yml) +[![Release](https://img.shields.io/github/release/sunbeam-labs/sbx_vsearch.svg?style=flat)](https://github.com/sunbeam-labs/sbx_vsearch/releases/latest) +[![DockerHub](https://img.shields.io/docker/pulls/sunbeamlabs/sbx_vsearch)](https://hub.docker.com/repository/docker/sunbeamlabs/sbx_vsearch/) A [Sunbeam](https://github.com/sunbeam-labs/sunbeam) extension for using [Vsearch](https://github.com/torognes/vsearch) with the `--usearch-global` option to do alignment of reads to any fasta file. @@ -27,7 +29,7 @@ N.B. For sunbeam versions <4 the last command will be something like `sunbeam ru ## Configuration - - db: Is the filepath to a directory containing reference fasta files + - db_fp: Is the filepath to a directory containing reference fasta files - threads: Is the number of threads to use while running vsearch - min_id: Is the minimum identity for query-target match - weak_id: Is the set lower than min-id and you will get some weaker matches too diff --git a/config.yml b/config.yml index 3a2356c..3e342fb 100644 --- a/config.yml +++ b/config.yml @@ -1,5 +1,5 @@ sbx_vsearch: - db: "" + db_fp: "" threads: 4 min_id: 0.85 #minimum identity for query-target match weak_id: 0.85 #set lower than min-id and you will get some weaker matches too diff --git a/sbx_vsearch.smk b/sbx_vsearch.smk index 195dd5f..3bc41f1 100644 --- a/sbx_vsearch.smk +++ b/sbx_vsearch.smk @@ -46,7 +46,7 @@ rule fq_2_fa: rule run_vsearch: input: query=str(MAPPING_FP / "R1" / "{sample}_1.fasta"), - db=str(Cfg["sbx_vsearch"]["db"]), + db=str(Cfg["sbx_vsearch"]["db_fp"]), output: reports=str(MAPPING_FP / "vsearch" / "{sample}_report.tsv"), alignments=str(MAPPING_FP / "vsearch" / "{sample}.fasta"),