diff --git a/meteor/downloader.py b/meteor/downloader.py index 4e3b419..de93196 100644 --- a/meteor/downloader.py +++ b/meteor/downloader.py @@ -22,12 +22,17 @@ from time import time import tarfile import json +from typing import ClassVar +import sys @dataclass class Downloader(Session): """Download and prepare catalogues""" + CONFIG_DATA_FILE: ClassVar[Path] = Path("data/zenodo.json") + TEST_CATALOGUE: ClassVar[str] = "test" + meteor: type[Component] choice: str taxonomy: bool @@ -36,14 +41,26 @@ class Downloader(Session): catalogues_config: dict = field(default_factory=dict) start_time: float = field(default_factory=float) - def __post_init__(self) -> None: + @staticmethod + def load_catalogues_config() -> dict: try: - config_data = importlib.resources.files("meteor") / "data/zenodo.json" + config_data = importlib.resources.files("meteor") / str(Downloader.CONFIG_DATA_FILE) with importlib.resources.as_file(config_data) as configuration_path: with configuration_path.open("rt", encoding="UTF-8") as config: - self.catalogues_config = json.load(config) - except AssertionError: - logging.error("The file zenodo.json is missing in meteor source") + return json.load(config) + except FileNotFoundError: + logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name) + sys.exit(1) + + @staticmethod + def get_available_catalogues() -> list[str]: + catalogues_config = Downloader.load_catalogues_config() + available_catalogues = list(catalogues_config.keys()) + available_catalogues.remove(Downloader.TEST_CATALOGUE) + return available_catalogues + + def __post_init__(self) -> None: + self.catalogues_config = Downloader.load_catalogues_config() self.meteor.ref_dir.mkdir(exist_ok=True, parents=True) if self.taxonomy: self.data_type = "taxonomy_info" @@ -111,7 +128,7 @@ def execute(self) -> None: ) urlretrieve(url, filename=catalogue, reporthook=self.show_progress) print(flush=True) - if self.choice == "test": + if self.choice == Downloader.TEST_CATALOGUE: for sample in self.catalogues_config[self.choice]["samples"]: logging.info(f"Download {sample} fastq file") url_fastq = self.catalogues_config[self.choice]["samples"][sample][ @@ -140,3 +157,4 @@ def execute(self) -> None: ) except AssertionError: logging.error("MD5sum of %s has a different value than expected", catalogue) + sys.exit(1) diff --git a/meteor/meteor.py b/meteor/meteor.py index c2079b9..bbb92da 100644 --- a/meteor/meteor.py +++ b/meteor/meteor.py @@ -148,18 +148,7 @@ def get_arguments() -> Namespace: # pragma: no cover dest="user_choice", type=str, required=True, - choices=[ - "cat_gut", - "chicken_caecal", - "dog_gut", - "human_gut", - "human_oral", - "human_skin", - "mouse_gut", - "rabbit_gut", - "rat_gut", - "pig_gut", - ], + choices=Downloader.get_available_catalogues(), help="Select the catalogue to download.", ) download_parser.add_argument( @@ -231,7 +220,6 @@ def get_arguments() -> Namespace: # pragma: no cover fastq_parser.add_argument( "-p", dest="ispaired", - default=False, action="store_true", help="Fastq files are paired.", ) @@ -542,10 +530,10 @@ def get_arguments() -> Namespace: # pragma: no cover dest="max_depth", default=100, type=int, - help="Maximum depth taken in account (default 100).", + help="Maximum depth taken in account (default: %(default)d).", ) strain_parser.add_argument( - "-t", dest="threads", default=1, type=int, help="Threads count." + "-t", dest="threads", default=1, type=num_threads, help="Threads count." ) # strain_parser.add_argument( # "-c", @@ -561,7 +549,7 @@ def get_arguments() -> Namespace: # pragma: no cover choices=range(1, 10000), metavar="MIN_SNP_DEPTH", type=int, - help="""Minimum snp depth (default >=3). + help="""Minimum snp depth (default: >= %(default)d). Values should be comprised between 1 and the maximum depth (10000 reads are taken in account).""", ) @@ -570,7 +558,7 @@ def get_arguments() -> Namespace: # pragma: no cover dest="min_frequency_non_reference", default=0.8, type=isborned01, - help="Minimum frequency for non reference allele (default >=0.8).", + help="Minimum frequency for non reference allele (default: >= %(default).1f).", ) strain_parser.add_argument( "-m", @@ -579,7 +567,7 @@ def get_arguments() -> Namespace: # pragma: no cover choices=range(1, 101), metavar="MIN_MSP_COVERAGE", type=int, - help="""Minimum number of genes from the MSP that are covered (default >=50). + help="""Minimum number of genes from the MSP that are covered (default: >= %(default)d). Values should be comprised between 1 and 100 (maximum number of core genes taken in account).""", ) @@ -588,7 +576,7 @@ def get_arguments() -> Namespace: # pragma: no cover dest="min_gene_coverage", default=0.8, type=isborned01, - help="Minimum gene coverage from 0 to 1 (default >=0.5).", + help="Minimum gene coverage from 0 to 1 (default: >= %(default).1f).", ) strain_parser.add_argument( "-o", @@ -601,7 +589,7 @@ def get_arguments() -> Namespace: # pragma: no cover "--kc", dest="keep_consensus", action="store_true", - help="Keep consensus marker genes (default False, set to True to recompute strain)", + help="Keep consensus marker genes (default: False, set to True to recompute strain)", ) strain_parser.add_argument( "--tmp", @@ -624,14 +612,14 @@ def get_arguments() -> Namespace: # pragma: no cover dest="max_gap", default=0.5, type=isborned01, - help="Removes sites constitued of >= cutoff gap character (default >=0.5).", + help="Removes sites constitued of >= cutoff gap character (default: >= %(default).1f).", ) tree_parser.add_argument( "-c", dest="gap_char", default="-", type=str, - help="Gap character (default -).", + help="Gap character (default: %(default)s).", ) tree_parser.add_argument( "-f", @@ -639,21 +627,21 @@ def get_arguments() -> Namespace: # pragma: no cover default=None, choices=["png", "svg", "pdf", "txt"], type=str, - help="Output image format (default txt).", + help="Output image format (default: %(default)s).", ) tree_parser.add_argument( "-w", dest="width", default=500, type=int, - help="Output image width (default 500px).", + help="Output image width (default: %(default)dpx).", ) tree_parser.add_argument( "-H", dest="height", default=500, type=int, - help="Output image height (default 500px).", + help="Output image height (default: %(default)dpx).", ) tree_parser.add_argument( "-o", @@ -663,7 +651,7 @@ def get_arguments() -> Namespace: # pragma: no cover help="Path to output directory.", ) tree_parser.add_argument( - "-t", dest="threads", default=1, type=int, help="Threads count." + "-t", dest="threads", default=num_threads, type=int, help="Threads count." ) tree_parser.add_argument( "--tmp", @@ -679,10 +667,10 @@ def main() -> None: # pragma: no cover """ Main program function """ - # Get arguments - args = get_arguments() # Let us logging logger = get_logging() + # Get arguments + args = get_arguments() # version = importlib.metadata.version("meteor") # print("Meteor version", version) # Create a meteor dataset @@ -809,7 +797,7 @@ def main() -> None: # pragma: no cover meteor.tmp_dir = Path(tmpdirname) meteor.mapping_dir = Path(tmpdirname) / "map" meteor.fastq_dir = Path(tmpdirname) - downloader = Downloader(meteor, "test", False, True) + downloader = Downloader(meteor, Downloader.TEST_CATALOGUE, False, True) downloader.execute() fastq_importer = FastqImporter(meteor, meteor.tmp_dir, False, None) fastq_importer.execute() diff --git a/meteor/treebuilder.py b/meteor/treebuilder.py index 2eb742d..41b5cd3 100644 --- a/meteor/treebuilder.py +++ b/meteor/treebuilder.py @@ -23,7 +23,7 @@ import sys import pandas as pd import lzma - +from shutil import rmtree @dataclass class TreeBuilder(Session): @@ -121,3 +121,4 @@ def execute(self) -> None: ) except ete3.parser.newick.NewickError: logging.info("Not sufficient info in %s.", str(tree_file.resolve())) + rmtree(self.meteor.tmp_dir, ignore_errors=True) \ No newline at end of file