From d9a47454b83ec3ed3e7b693b49e6ab97b7a961b4 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Wed, 27 Nov 2024 17:24:48 +0100 Subject: [PATCH] remove default config file to make all settings explicit (#287) A default config file `nplinker_default.toml` was used automatically in NPLinker. However, it may lead to two problems: 1. The use of default settings cause confusions to users who might not be aware of them. 2. It requires users to be familiar with Dynaconf to config their own settings This PR removes the default config file and let users to config all settings in their config file `nplinker.toml`. --- docs/concepts/config_file.md | 8 +- docs/quickstart.md | 38 ++++++-- src/nplinker/config.py | 12 +-- src/nplinker/data/nplinker.toml | 91 +++++++++++-------- src/nplinker/loader.py | 22 ++--- src/nplinker/nplinker_default.toml | 17 ---- .../integration/data/nplinker_local_mode.toml | 14 ++- tests/unit/data/nplinker_local_mode.toml | 14 ++- tests/unit/test_config.py | 7 +- 9 files changed, 123 insertions(+), 100 deletions(-) delete mode 100644 src/nplinker/nplinker_default.toml diff --git a/docs/concepts/config_file.md b/docs/concepts/config_file.md index d7e8f4315..e2f74ffc6 100644 --- a/docs/concepts/config_file.md +++ b/docs/concepts/config_file.md @@ -4,13 +4,9 @@ --8<-- "src/nplinker/data/nplinker.toml" ``` +## Example Configuration -## Default Configurations -The default configurations are automatically used by NPLinker if you don't set them in your config file. - -```toml ---8<-- "src/nplinker/nplinker_default.toml" -``` +For a full example of a configuration file, see [here](../quickstart.md#3-prepare-config-file). ## Config loader diff --git a/docs/quickstart.md b/docs/quickstart.md index 1d918c83a..de215f038 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -166,16 +166,27 @@ is recommended to put it in the working directory created in step 2. The details of all settings can be found at this page [Config File](./concepts/config_file.md). -To keep it simple, [default settings](./concepts/config_file.md#default-configurations) will be used -automatically by NPLinker if you don't set them in your `nplinker.toml` config file. - -What you need to do is to set the `root_dir` and `mode` in the `nplinker.toml` file. +Here are some example values for the `nplinker.toml` file: === "`local` mode" ```toml title="nplinker.toml" root_dir = "absolute/path/to/working/directory" # (1)! mode = "local" - # and other settings you want to override the default settings + + [log] + level = "DEBUG" + use_console = true + + [mibig] + to_use = true + version = "3.1" + + [bigscape] + version = 1 + cutoff = "0.30" + + [scoring] + methods = ["metcalf"] ``` 1. Replace `absolute/path/to/working/directory` with the **absolute** path to the working directory @@ -187,7 +198,22 @@ What you need to do is to set the `root_dir` and `mode` in the `nplinker.toml` f root_dir = "absolute/path/to/working/directory" # (1)! mode = "podp" podp_id = "podp_id" # (2)! - # and other settings you want to override the default settings + + [log] + level = "DEBUG" + use_console = true + + [mibig] + to_use = true + version = "3.1" + + [bigscape] + version = 2 + cutoff = "0.30" + parameters = "--mibig_version 3.1 --include_singletons --gcf_cutoffs 0.30" + + [scoring] + methods = ["metcalf"] ``` 1. Replace `absolute/path/to/working/directory` with the **absolute** path to the working directory diff --git a/src/nplinker/config.py b/src/nplinker/config.py index d1ade8bae..a9491799f 100644 --- a/src/nplinker/config.py +++ b/src/nplinker/config.py @@ -1,6 +1,5 @@ from __future__ import annotations from os import PathLike -from pathlib import Path from dynaconf import Dynaconf from dynaconf import Validator from nplinker.utils import transform_to_full_path @@ -25,11 +24,8 @@ def load_config(config_file: str | PathLike) -> Dynaconf: if not config_file.exists(): raise FileNotFoundError(f"Config file '{config_file}' not found") - # Locate the default config file - default_config_file = Path(__file__).resolve().parent / "nplinker_default.toml" - # Load config files - config = Dynaconf(settings_files=[config_file], preload=[default_config_file]) + config = Dynaconf(settings_files=[config_file]) # Validate configs config.validators.register(*CONFIG_VALIDATORS) @@ -61,7 +57,7 @@ def load_config(config_file: str | PathLike) -> Dynaconf: is_in=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ), Validator("log.file", is_type_of=str), - Validator("log.use_console", is_type_of=bool), + Validator("log.use_console", required=True, is_type_of=bool), # Mibig Validator("mibig.to_use", required=True, is_type_of=bool), Validator( @@ -71,9 +67,9 @@ def load_config(config_file: str | PathLike) -> Dynaconf: when=Validator("mibig.to_use", eq=True), ), # BigScape - Validator("bigscape.parameters", required=True, is_type_of=str), + Validator("bigscape.parameters", is_type_of=str), Validator("bigscape.cutoff", required=True, is_type_of=str), - Validator("bigscape.version", required=True, is_type_of=int), + Validator("bigscape.version", required=True, is_type_of=int, is_in=[1, 2]), # Scoring ## `scoring.methods` must be a list of strings and must contain at least one of the ## supported scoring methods. diff --git a/src/nplinker/data/nplinker.toml b/src/nplinker/data/nplinker.toml index 6a9f0d8da..f069de474 100644 --- a/src/nplinker/data/nplinker.toml +++ b/src/nplinker/data/nplinker.toml @@ -2,76 +2,87 @@ # NPLinker configuration file ############################# -# The root directory of the NPLinker project. You need to create it first. -# The value is required and must be a full path. root_dir = "" +# [REQUIRED] The value is required and must be a full path. +# The root directory of the NPLinker project. You need to create it first. + +mode = "podp" +# [REQUIRED] Available values are "podp" and "local". # The mode for preparing dataset. -# The available modes are "podp" and "local". # "podp" mode is for using the PODP platform (https://pairedomicsdata.bioinformatics.nl/) to prepare the dataset. -# "local" mode is for preparing the dataset locally. So uers do not need to upload their data to the PODP platform. -# The value is required. -mode = "podp" -# The PODP project identifier. -# The value is required if the mode is "podp". +# "local" mode is for preparing the dataset locally. So users do not need to upload their data to the PODP platform. + podp_id = "" +# [REQUIRED-UNDER-CONDITIONS] The value is required if the mode is "podp". +# The PODP project identifier. +# Example: The identifier is "4b29ddc3-26d0-40d7-80c5-44fb6631dbf9.4" for the project +# https://pairedomicsdata.bioinformatics.nl/projects/4b29ddc3-26d0-40d7-80c5-44fb6631dbf9.4 [log] -# Log level. The available levels are same as the levels in python package `logging`: -# "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". -# The default value is "INFO". +# Settings for logging. + level = "INFO" +# [REQUIRED] Available values are "DEBUG", "INFO", "WARNING", "ERROR", and "CRITICAL". +# Log level. + +use_console = true +# [REQUIRED] Available values are "true" and "false". +# Whether to write log messages to console. + +file = "path/to/logfile" +# [OPTIONAL] # The log file to append log messages. -# The value is optional. # If not set or use empty string, log messages will not be written to a file. # The file will be created if it does not exist. Log messages will be appended to the file if it exists. -file = "path/to/logfile" -# Whether to write log meesages to console. -# The default value is true. -use_console = true [mibig] -# Whether to use mibig metadta (json). -# The default value is true. +# Settings for MIBiG. + to_use = true -# The version of mibig metadata. -# Make sure using the same version of mibig in bigscape. -# The default value is "3.1" +# [REQUIRED] Available values are `true` and `false`. +# Whether to use MIBiG annotations/metadata data for the analysis. + version = "3.1" +# [REQUIRED-UNDER-CONDITIONS] The version must be same as the version of MIBiG used in BiG-SCAPE. +# The version of MIBiG data to use. +# Check all available versions at https://mibig.secondarymetabolites.org/download. [bigscape] -# The parameters to use for running BiG-SCAPE. -# Version of BiG-SCAPE to run. Make sure to change the parameters property below as well -# when changing versions. +# Settings for BiG-SCAPE. + version = 1 -# Required BiG-SCAPE parameters. +# [REQUIRED] Available values are 1 and 2. 1 for version 1.x series and 2 for version 2.x series. +# The version of BiG-SCAPE to use. + +cutoff = "0.30" +# [REQUIRED] The value must be a string. +# Which cutoff to use for the analysis. +# There might be multiple cutoffs in the BiG-SCAPE output and this value must be one of them. + +parameters = "version1_parameters_or_version2_parameters" +# [REQUIRED-UNDER-CONDITIONS] It's required when you want to run BiG-SCAPE in NPLinker. +# Parameters for running BiG-SCAPE. # -------------- # For version 1: # ------------- -# Required parameters are: `--mix`, `--include_singletons` and `--cutoffs`. NPLinker needs them to run the analysis properly. -# Do NOT set these parameters: `--inputdir`, `--outputdir`, `--pfam_dir`. NPLinker will automatically configure them. -# If parameter `--mibig` is set, make sure to set the config `mibig.to_use` to true and `mibig.version` to the version of mibig in BiG-SCAPE. -# The default value is "--mibig --clans-off --mix --include_singletons --cutoffs 0.30". +# The parameters MUST contain `--mix`, `--include_singletons` and `--cutoffs`. NPLinker needs them to run the analysis properly. +# The parameters must NOT contain `--inputdir`, `--outputdir`, `--pfam_dir`. NPLinker will automatically configure them. +# An example value could be: "--mibig --clans-off --mix --include_singletons --cutoffs 0.30". # -------------- # For version 2: # -------------- -# Note that BiG-SCAPE v2 has subcommands. NPLinker requires the `cluster` subcommand and its parameters. +# BiG-SCAPE v2 has subcommands. NPLinker requires the `cluster` subcommand and its parameters. # Required parameters of `cluster` subcommand are: `--mibig_version`, `--include_singletons` and `--gcf_cutoffs`. # DO NOT set these parameters: `--pfam_path`, `--inputdir`, `--outputdir`. NPLinker will automatically configure them. # BiG-SCPAPE v2 also runs a `--mix` analysis by default, so you don't need to set this parameter here. -# Example parameters for BiG-SCAPE v2: "--mibig_version 3.1 --include_singletons --gcf_cutoffs 0.30" -parameters = "--mibig --clans-off --mix --include_singletons --cutoffs 0.30" -# Which bigscape cutoff to use for NPLinker analysis. -# There might be multiple cutoffs in bigscape output. -# Note that this value must be a string. -# The default value is "0.30". -cutoff = "0.30" +# An example value could be: "--mibig_version 3.1 --include_singletons --gcf_cutoffs 0.30" [scoring] -# Scoring methods. -# Valid values are "metcalf" and "rosetta". -# The default value is "metcalf". +# Settings for scoring. methods = ["metcalf"] +# [REQUIRED] Available values are "metcalf" and "rosetta". +# Scoring methods to use for the analysis. \ No newline at end of file diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index 8d2bcd189..7c0e5eaab 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -206,24 +206,20 @@ def _load_genomics(self): all_bgcs_with_strain = antismash_bgcs_with_strain + self.mibig_bgcs # Step 4: load all GCF objects - bigscape_cluster_file = ( - self.config.root_dir - / defaults.BIGSCAPE_DIRNAME - / f"mix_clustering_c{self.config.bigscape.cutoff}.tsv" - ) - bigscape_db_file = self.config.root_dir / defaults.BIGSCAPE_DIRNAME / "data_sqlite.db" - - # switch depending on found file. prefer V1 if both are found - if bigscape_cluster_file.exists(): + if self.config.bigscape.version == 1: + bigscape_cluster_file = ( + self.config.root_dir + / defaults.BIGSCAPE_DIRNAME + / f"mix_clustering_c{self.config.bigscape.cutoff}.tsv" + ) loader = BigscapeGCFLoader(bigscape_cluster_file) logger.info(f"Loading BigSCAPE cluster file {bigscape_cluster_file}") - elif bigscape_db_file.exists(): + elif self.config.bigscape.version == 2: + bigscape_db_file = self.config.root_dir / defaults.BIGSCAPE_DIRNAME / "data_sqlite.db" loader = BigscapeV2GCFLoader(bigscape_db_file) logger.info(f"Loading BigSCAPE database file {bigscape_db_file}") else: - raise FileNotFoundError( - f"Neither BigSCAPE cluster file {bigscape_cluster_file} nor database file {bigscape_db_file} were found." - ) + raise ValueError(f"Unsupported BigScape version: {self.config.bigscape.version}") raw_gcfs = loader.get_gcfs() diff --git a/src/nplinker/nplinker_default.toml b/src/nplinker/nplinker_default.toml deleted file mode 100644 index 390800366..000000000 --- a/src/nplinker/nplinker_default.toml +++ /dev/null @@ -1,17 +0,0 @@ -# NPLinker default configurations - -[log] -level = "INFO" -use_console = true - -[mibig] -to_use = true -version = "3.1" - -[bigscape] -version = 1 -parameters = "--mibig --clans-off --mix --include_singletons --cutoffs 0.30" -cutoff = "0.30" - -[scoring] -methods = ["metcalf"] diff --git a/tests/integration/data/nplinker_local_mode.toml b/tests/integration/data/nplinker_local_mode.toml index 174ee852e..f578c8716 100644 --- a/tests/integration/data/nplinker_local_mode.toml +++ b/tests/integration/data/nplinker_local_mode.toml @@ -1,7 +1,17 @@ -dynaconf_merge = true # merge with the default settings, provided by the Dynaconf library - root_dir = "@format {env[NPLINKER_ROOT_DIR]}" mode = "local" [log] level = "DEBUG" +use_console = true + +[mibig] +to_use = true +version = "3.1" + +[bigscape] +version = 1 +cutoff = "0.30" + +[scoring] +methods = ["metcalf"] \ No newline at end of file diff --git a/tests/unit/data/nplinker_local_mode.toml b/tests/unit/data/nplinker_local_mode.toml index 174ee852e..f578c8716 100644 --- a/tests/unit/data/nplinker_local_mode.toml +++ b/tests/unit/data/nplinker_local_mode.toml @@ -1,7 +1,17 @@ -dynaconf_merge = true # merge with the default settings, provided by the Dynaconf library - root_dir = "@format {env[NPLINKER_ROOT_DIR]}" mode = "local" [log] level = "DEBUG" +use_console = true + +[mibig] +to_use = true +version = "3.1" + +[bigscape] +version = 1 +cutoff = "0.30" + +[scoring] +methods = ["metcalf"] \ No newline at end of file diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index ce2f24b79..23d5fa8ec 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -4,7 +4,7 @@ def test_config(tmp_path): - """Test loading the default config file.""" + """Test loading config file.""" os.environ["NPLINKER_ROOT_DIR"] = str(tmp_path) # Create a temporary root dir for NPLinker config = load_config(CONFIG_FILE_LOCAL_MODE) @@ -13,17 +13,12 @@ def test_config(tmp_path): assert config["log.level"] == "DEBUG" assert config.get("log.level") == "DEBUG" - # The following are default values from nplinker_default.toml assert config.get("log.file") is None assert config.log.use_console is True assert config.mibig.to_use is True assert config.mibig.version == "3.1" - assert ( - config.bigscape.parameters - == "--mibig --clans-off --mix --include_singletons --cutoffs 0.30" - ) assert config.bigscape.cutoff == "0.30" assert config.bigscape.version == 1