Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve usage and meteor downloader #36

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions meteor/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,17 @@
from time import time
import tarfile
import json
from typing import ClassVar
import sys


@dataclass
class Downloader(Session):
"""Download and prepare catalogues"""

CONFIG_DATA_FILE: ClassVar[Path] = Path("data/zenodo.json")
TEST_CATALOGUE: ClassVar[str] = "test"

meteor: type[Component]
choice: str
taxonomy: bool
Expand All @@ -36,14 +41,26 @@
catalogues_config: dict = field(default_factory=dict)
start_time: float = field(default_factory=float)

def __post_init__(self) -> None:
@staticmethod
def load_catalogues_config() -> dict:
try:
config_data = importlib.resources.files("meteor") / "data/zenodo.json"
config_data = importlib.resources.files("meteor") / str(Downloader.CONFIG_DATA_FILE)
with importlib.resources.as_file(config_data) as configuration_path:
with configuration_path.open("rt", encoding="UTF-8") as config:
self.catalogues_config = json.load(config)
except AssertionError:
logging.error("The file zenodo.json is missing in meteor source")
return json.load(config)
except FileNotFoundError:
logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name)
sys.exit(1)

Check warning on line 53 in meteor/downloader.py

View check run for this annotation

Codecov / codecov/patch

meteor/downloader.py#L51-L53

Added lines #L51 - L53 were not covered by tests

@staticmethod
def get_available_catalogues() -> list[str]:
catalogues_config = Downloader.load_catalogues_config()
available_catalogues = list(catalogues_config.keys())
available_catalogues.remove(Downloader.TEST_CATALOGUE)
return available_catalogues

Check warning on line 60 in meteor/downloader.py

View check run for this annotation

Codecov / codecov/patch

meteor/downloader.py#L57-L60

Added lines #L57 - L60 were not covered by tests

def __post_init__(self) -> None:
self.catalogues_config = Downloader.load_catalogues_config()
self.meteor.ref_dir.mkdir(exist_ok=True, parents=True)
if self.taxonomy:
self.data_type = "taxonomy_info"
Expand Down Expand Up @@ -111,7 +128,7 @@
)
urlretrieve(url, filename=catalogue, reporthook=self.show_progress)
print(flush=True)
if self.choice == "test":
if self.choice == Downloader.TEST_CATALOGUE:
for sample in self.catalogues_config[self.choice]["samples"]:
logging.info(f"Download {sample} fastq file")
url_fastq = self.catalogues_config[self.choice]["samples"][sample][
Expand Down Expand Up @@ -140,3 +157,4 @@
)
except AssertionError:
logging.error("MD5sum of %s has a different value than expected", catalogue)
sys.exit(1)

Check warning on line 160 in meteor/downloader.py

View check run for this annotation

Codecov / codecov/patch

meteor/downloader.py#L160

Added line #L160 was not covered by tests
46 changes: 17 additions & 29 deletions meteor/meteor.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,7 @@ def get_arguments() -> Namespace: # pragma: no cover
dest="user_choice",
type=str,
required=True,
choices=[
"cat_gut",
"chicken_caecal",
"dog_gut",
"human_gut",
"human_oral",
"human_skin",
"mouse_gut",
"rabbit_gut",
"rat_gut",
"pig_gut",
],
choices=Downloader.get_available_catalogues(),
help="Select the catalogue to download.",
)
download_parser.add_argument(
Expand Down Expand Up @@ -231,7 +220,6 @@ def get_arguments() -> Namespace: # pragma: no cover
fastq_parser.add_argument(
"-p",
dest="ispaired",
default=False,
action="store_true",
help="Fastq files are paired.",
)
Expand Down Expand Up @@ -542,10 +530,10 @@ def get_arguments() -> Namespace: # pragma: no cover
dest="max_depth",
default=100,
type=int,
help="Maximum depth taken in account (default 100).",
help="Maximum depth taken in account (default: %(default)d).",
)
strain_parser.add_argument(
"-t", dest="threads", default=1, type=int, help="Threads count."
"-t", dest="threads", default=1, type=num_threads, help="Threads count."
)
# strain_parser.add_argument(
# "-c",
Expand All @@ -561,7 +549,7 @@ def get_arguments() -> Namespace: # pragma: no cover
choices=range(1, 10000),
metavar="MIN_SNP_DEPTH",
type=int,
help="""Minimum snp depth (default >=3).
help="""Minimum snp depth (default: >= %(default)d).
Values should be comprised between 1 and the maximum depth
(10000 reads are taken in account).""",
)
Expand All @@ -570,7 +558,7 @@ def get_arguments() -> Namespace: # pragma: no cover
dest="min_frequency_non_reference",
default=0.8,
type=isborned01,
help="Minimum frequency for non reference allele (default >=0.8).",
help="Minimum frequency for non reference allele (default: >= %(default).1f).",
)
strain_parser.add_argument(
"-m",
Expand All @@ -579,7 +567,7 @@ def get_arguments() -> Namespace: # pragma: no cover
choices=range(1, 101),
metavar="MIN_MSP_COVERAGE",
type=int,
help="""Minimum number of genes from the MSP that are covered (default >=50).
help="""Minimum number of genes from the MSP that are covered (default: >= %(default)d).
Values should be comprised between 1 and 100
(maximum number of core genes taken in account).""",
)
Expand All @@ -588,7 +576,7 @@ def get_arguments() -> Namespace: # pragma: no cover
dest="min_gene_coverage",
default=0.8,
type=isborned01,
help="Minimum gene coverage from 0 to 1 (default >=0.5).",
help="Minimum gene coverage from 0 to 1 (default: >= %(default).1f).",
)
strain_parser.add_argument(
"-o",
Expand All @@ -601,7 +589,7 @@ def get_arguments() -> Namespace: # pragma: no cover
"--kc",
dest="keep_consensus",
action="store_true",
help="Keep consensus marker genes (default False, set to True to recompute strain)",
help="Keep consensus marker genes (default: False, set to True to recompute strain)",
)
strain_parser.add_argument(
"--tmp",
Expand All @@ -624,36 +612,36 @@ def get_arguments() -> Namespace: # pragma: no cover
dest="max_gap",
default=0.5,
type=isborned01,
help="Removes sites constitued of >= cutoff gap character (default >=0.5).",
help="Removes sites constitued of >= cutoff gap character (default: >= %(default).1f).",
)
tree_parser.add_argument(
"-c",
dest="gap_char",
default="-",
type=str,
help="Gap character (default -).",
help="Gap character (default: %(default)s).",
)
tree_parser.add_argument(
"-f",
dest="format",
default=None,
choices=["png", "svg", "pdf", "txt"],
type=str,
help="Output image format (default txt).",
help="Output image format (default: %(default)s).",
)
tree_parser.add_argument(
"-w",
dest="width",
default=500,
type=int,
help="Output image width (default 500px).",
help="Output image width (default: %(default)dpx).",
)
tree_parser.add_argument(
"-H",
dest="height",
default=500,
type=int,
help="Output image height (default 500px).",
help="Output image height (default: %(default)dpx).",
)
tree_parser.add_argument(
"-o",
Expand All @@ -663,7 +651,7 @@ def get_arguments() -> Namespace: # pragma: no cover
help="Path to output directory.",
)
tree_parser.add_argument(
"-t", dest="threads", default=1, type=int, help="Threads count."
"-t", dest="threads", default=num_threads, type=int, help="Threads count."
)
tree_parser.add_argument(
"--tmp",
Expand All @@ -679,10 +667,10 @@ def main() -> None: # pragma: no cover
"""
Main program function
"""
# Get arguments
args = get_arguments()
# Let us logging
logger = get_logging()
# Get arguments
args = get_arguments()
# version = importlib.metadata.version("meteor")
# print("Meteor version", version)
# Create a meteor dataset
Expand Down Expand Up @@ -809,7 +797,7 @@ def main() -> None: # pragma: no cover
meteor.tmp_dir = Path(tmpdirname)
meteor.mapping_dir = Path(tmpdirname) / "map"
meteor.fastq_dir = Path(tmpdirname)
downloader = Downloader(meteor, "test", False, True)
downloader = Downloader(meteor, Downloader.TEST_CATALOGUE, False, True)
downloader.execute()
fastq_importer = FastqImporter(meteor, meteor.tmp_dir, False, None)
fastq_importer.execute()
Expand Down
3 changes: 2 additions & 1 deletion meteor/treebuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import sys
import pandas as pd
import lzma

from shutil import rmtree

Check warning on line 26 in meteor/treebuilder.py

View check run for this annotation

Codecov / codecov/patch

meteor/treebuilder.py#L26

Added line #L26 was not covered by tests

@dataclass
class TreeBuilder(Session):
Expand Down Expand Up @@ -121,3 +121,4 @@
)
except ete3.parser.newick.NewickError:
logging.info("Not sufficient info in %s.", str(tree_file.resolve()))
rmtree(self.meteor.tmp_dir, ignore_errors=True)

Check warning on line 124 in meteor/treebuilder.py

View check run for this annotation

Codecov / codecov/patch

meteor/treebuilder.py#L124

Added line #L124 was not covered by tests
Loading