From 2f69e7748a02747987657e5fe8e84fc80ed9de52 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Fri, 1 Oct 2021 11:44:45 +0100 Subject: [PATCH] Change the default extension behaviour to `--noextend` Following discussion in Issue #342. --- pyani/anim.py | 22 +++++++++++----------- pyani/pyani_orm.py | 14 +++++++------- pyani/scripts/parsers/anim_parser.py | 22 +++++++++++----------- pyani/scripts/subcommands/subcmd_anim.py | 8 ++++---- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pyani/anim.py b/pyani/anim.py index 13672c07..9a19e5e4 100644 --- a/pyani/anim.py +++ b/pyani/anim.py @@ -141,7 +141,7 @@ def generate_nucmer_jobs( nucmer_exe: Path = pyani_config.NUCMER_DEFAULT, filter_exe: Path = pyani_config.FILTER_DEFAULT, maxmatch: bool = False, - extend: bool = False, + noextend: bool = False, jobprefix: str = "ANINUCmer", ): """Return list of Jobs describing NUCmer command-lines for ANIm. @@ -151,14 +151,14 @@ def generate_nucmer_jobs( :param nucmer_exe: str, location of the nucmer binary :param filter_exe: :param maxmatch: Boolean flag indicating to use NUCmer's -maxmatch option - :param extend: Boolean flag indicating whether to use NUCmer's --(no)extend (False: --noextend; True: --extend) + :param noextend: Boolean flag indicating whether to use NUCmer's --(no)extend (True: --noextend; False: --extend) :param jobprefix: Loop over all FASTA files, generating Jobs describing NUCmer command lines for each pairwise comparison. """ ncmds, fcmds = generate_nucmer_commands( - filenames, outdir, nucmer_exe, filter_exe, maxmatch, extend + filenames, outdir, nucmer_exe, filter_exe, maxmatch, noextend ) joblist = [] for idx, ncmd in enumerate(ncmds): @@ -177,7 +177,7 @@ def generate_nucmer_commands( nucmer_exe: Path = pyani_config.NUCMER_DEFAULT, filter_exe: Path = pyani_config.FILTER_DEFAULT, maxmatch: bool = False, - extend: bool = False, + noextend: bool = False, ) -> Tuple[List, List]: """Return list of NUCmer command-lines for ANIm. @@ -185,7 +185,7 @@ def generate_nucmer_commands( :param outdir: path to output directory :param nucmer_exe: location of the nucmer binary :param maxmatch: Boolean flag indicating to use NUCmer's -maxmatch option - :param extend: Boolean flag indicating whether to use NUCmer's --(no)extend (False: --noextend; True: --extend) + :param noextend: Boolean flag indicating whether to use NUCmer's --(no)extend (True: --noextend; False: --extend) The first element returned is a list of NUCmer commands, and the second a corresponding list of delta_filter_wrapper.py commands. @@ -202,7 +202,7 @@ def generate_nucmer_commands( for idx, fname1 in enumerate(filenames[:-1]): for fname2 in filenames[idx + 1 :]: ncmd, dcmd = construct_nucmer_cmdline( - fname1, fname2, outdir, nucmer_exe, filter_exe, maxmatch, extend + fname1, fname2, outdir, nucmer_exe, filter_exe, maxmatch, noextend ) nucmer_cmdlines.append(ncmd) delta_filter_cmdlines.append(dcmd) @@ -218,7 +218,7 @@ def construct_nucmer_cmdline( nucmer_exe: Path = pyani_config.NUCMER_DEFAULT, filter_exe: Path = pyani_config.FILTER_DEFAULT, maxmatch: bool = False, - extend: bool = False, + noextend: bool = False, ) -> Tuple[str, str]: """Return a tuple of corresponding NUCmer and delta-filter commands. @@ -229,7 +229,7 @@ def construct_nucmer_cmdline( :param filter_exe: :param maxmatch: Boolean flag indicating whether to use NUCmer's -maxmatch option. If not, the -mum option is used instead - :param extend: Boolean flag indicating whether to use NUCmer's --(no)extend (False: --noextend; True: --extend) + :param noextend: Boolean flag indicating whether to use NUCmer's --(no)extend (True: --noextend; False: --extend) The split into a tuple was made necessary by changes to SGE/OGE. The delta-filter command must now be run as a dependency of the NUCmer @@ -252,10 +252,10 @@ def construct_nucmer_cmdline( mode = "--maxmatch" else: mode = "--mum" - if extend: - ext = " --extend" - else: + if noextend: ext = " --noextend" + else: + ext = " --extend" nucmercmd = f"{nucmer_exe} {mode} {ext} -p {outprefix} {fname2} {fname2}" # There's a subtle pathlib.Path issue, here. We must use string concatenation to add suffixes # to the outprefix files, as using path.with_suffix() instead can replace part of the filestem diff --git a/pyani/pyani_orm.py b/pyani/pyani_orm.py index a7e2ded1..eb2bcf06 100644 --- a/pyani/pyani_orm.py +++ b/pyani/pyani_orm.py @@ -274,7 +274,7 @@ class Comparison(Base): "version", "fragsize", "maxmatch", - "extend", + "noextend", ), ) @@ -290,7 +290,7 @@ class Comparison(Base): version = Column(String) fragsize = Column(Integer) maxmatch = Column(Boolean) - extend = Column(Boolean) + noextend = Column(Boolean) query = relationship( "Genome", foreign_keys=[query_id], back_populates="query_comparisons" @@ -344,7 +344,7 @@ def get_comparison_dict(session: Any) -> Dict[Tuple, Any]: :param session: live SQLAlchemy session of pyani database Returns Comparison objects, keyed by (_.query_id, _.subject_id, - _.program, _.version, _.fragsize, _.maxmatch, _.extend) tuple + _.program, _.version, _.fragsize, _.maxmatch, _.noextend) tuple """ return { ( @@ -354,7 +354,7 @@ def get_comparison_dict(session: Any) -> Dict[Tuple, Any]: _.version, _.fragsize, _.maxmatch, - _.extend, + _.noextend, ): _ for _ in session.query(Comparison).all() } @@ -416,7 +416,7 @@ def filter_existing_comparisons( version, fragsize: Optional[int] = None, maxmatch: Optional[bool] = False, - extend: Optional[bool] = False, + noextend: Optional[bool] = False, ) -> List: """Filter list of (Genome, Genome) comparisons for those not in the session db. @@ -427,7 +427,7 @@ def filter_existing_comparisons( :param version: version of program for comparison :param fragsize: fragment size for BLAST databases :param maxmatch: maxmatch used with nucmer comparison - :param extend: extend used with nucmer comparison + :param noextend: noextend used with nucmer comparison When passed a list of (Genome, Genome) comparisons as comparisons, check whether the comparison exists in the database and, if so, associate it with the passed run. @@ -448,7 +448,7 @@ def filter_existing_comparisons( version, fragsize, maxmatch, - extend, + noextend, ) ] ) diff --git a/pyani/scripts/parsers/anim_parser.py b/pyani/scripts/parsers/anim_parser.py index 1bf27258..291b2bd0 100644 --- a/pyani/scripts/parsers/anim_parser.py +++ b/pyani/scripts/parsers/anim_parser.py @@ -111,18 +111,18 @@ def build( default=False, help="override MUMmer to allow all NUCmer matches", ) - extend = parser.add_mutually_exclusive_group() - extend.add_argument( - "--extend", - dest="extend", - action="store_true", - default=False, - help="override default to allow overlapping NUCmer matches", - ) - extend.add_argument( + # extend = parser.add_mutually_exclusive_group() + # extend.add_argument( + # "--extend", + # dest="extend", + # action="store_true", + # default=True, + # help="override default to allow overlapping NUCmer matches", + # ) + parser.add_argument( "--noextend", - dest="extend", - action="store_false", + dest="noextend", + action="store_true", default=False, help="disallow overlapping NUCmer matches", ) diff --git a/pyani/scripts/subcommands/subcmd_anim.py b/pyani/scripts/subcommands/subcmd_anim.py index b0d8a1e1..2fc07a06 100644 --- a/pyani/scripts/subcommands/subcmd_anim.py +++ b/pyani/scripts/subcommands/subcmd_anim.py @@ -128,7 +128,7 @@ class ProgParams(NamedTuple): fragsize: str maxmatch: bool - extend: bool + noextend: bool def subcmd_anim(args: Namespace) -> None: @@ -252,7 +252,7 @@ def subcmd_anim(args: Namespace) -> None: nucmer_version, None, args.maxmatch, - args.extend, + args.noextend, ) logger.info( "\t...after check, still need to run %s comparisons", len(comparisons_to_run) @@ -338,7 +338,7 @@ def generate_joblist( args.nucmer_exe, args.filter_exe, args.maxmatch, - args.extend, + args.noextend, ) logger.debug("Commands to run:\n\t%s\n\t%s", ncmd, dcmd) outprefix = ncmd.split()[4] # prefix for NUCmer output @@ -447,7 +447,7 @@ def update_comparison_results( version=nucmer_version, fragsize=None, maxmatch=args.maxmatch, - extend=args.extend, + noextend=args.noextend, ) )