From 185d9d1fb02c54afc149a39e4cfb2a430a3c78f8 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 15 Jan 2024 21:13:05 -0500 Subject: [PATCH 01/32] feat: add gui code from ccbrpipeliner copied from renee/main.py in ccbrpipeliner repo --- resources/gui.py | 317 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 resources/gui.py diff --git a/resources/gui.py b/resources/gui.py new file mode 100644 index 0000000..987ee14 --- /dev/null +++ b/resources/gui.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +global DEBUG + +DEBUG = True + +import os +import sys +import stat +import subprocess +import glob +import uuid + +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) +imgdir = os.path.join(parent, "resources", "images") + +# Check if python 3.11 or later is available and running +from src.VersionCheck import version_check + +version_check() + +from src.Utils import * # copy_to_clipboard comes from Utils + +# import pysimplegui +import PySimpleGUI as sg + +global RENEEDIR +global SIFCACHE +global RENEE +global RENEEVER +global RANDOMSTR +global FILES2DELETE +global HOSTNAME + +RENEEDIR = os.getenv("RENEEDIR") +SIFCACHE = os.getenv("SIFCACHE") +RENEEVER = os.getenv("RENEEVER") +HOSTNAME = os.getenv("HOSTNAME") +RENNE = os.path.join(RENEEDIR, RENEEVER, "bin", "renee") +RANDOMSTR = str(uuid.uuid4()) +FILES2DELETE = list() + +# sg.SetOptions(button_color=sg.COLOR_SYSTEM_DEFAULT) + + +def get_combos(): + resource_dir = os.path.join(RENEEDIR, "resources") + if not os.path.exists(resource_dir): + sys.exit("ERROR: Folder does not exist : {}".format(resource_dir)) + searchterm = resource_dir + "/**/**/*json" + jsonfiles = glob.glob(searchterm) + if len(jsonfiles) == 0: + sys.exit("ERROR: No Genome+Annotation JSONs found in : {}".format(resource_dir)) + jsons = dict() + for j in jsonfiles: + k = os.path.basename(j) + k = k.replace(".json", "") + jsons[k] = j + return jsons + + +def fixpath(p): + return os.path.abspath(os.path.expanduser(p)) + + +def get_fastqs(inputdir): + inputdir = fixpath(inputdir) + inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") + inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") + inputfastqs.extend(inputfqs) + return inputfastqs + + +def deletefiles(): + for f in FILES2DELETE: + if os.path.exists(f): + os.remove(f) + + +def run(cmd, dry=False): + if dry: + cmd += " --dry-run " + runner_file = os.path.join(os.getenv("HOME"), RANDOMSTR + ".renee.runner") + FILES2DELETE.append(runner_file) + with open(runner_file, "w") as runner: + runner.write(cmd) + st = os.stat(runner_file) + os.chmod(runner_file, st.st_mode | stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) + x = subprocess.run(runner_file, capture_output=True, shell=True, text=True) + run_stdout = x.stdout.encode().decode("utf-8") + run_stderr = x.stderr.encode().decode("utf-8") + return run_stdout, run_stderr + + +def main(): + # get drop down genome+annotation options + jsons = get_combos() + genome_annotation_combinations = list(jsons.keys()) + genome_annotation_combinations.sort() + if DEBUG: + print(jsons) + if DEBUG: + print(genome_annotation_combinations) + + logo = sg.Image(os.path.join(imgdir, "CCBRlogo.png")) + # create layout + layout = [ + [sg.Column([[logo]], justification="center")], + [ + sg.Text( + "RENEE - Rna sEquencing aNalysis pipElinE", font=("Arial", 14, "bold") + ) + ], + [ + sg.Text( + "Input Fastqs folder", font=("Helvetica", 12, "bold"), size=(20, 1) + ), + sg.InputText(key="--INDIR--"), + sg.FolderBrowse(target="--INDIR--"), + ], + [ + sg.Text("Output folder", font=("Helvetica", 12, "bold"), size=(20, 1)), + sg.InputText(key="--OUTDIR--"), + sg.FolderBrowse(target="--OUTDIR--"), + ], + [ + sg.Text("Genome+Annotation", font=("Helvetica", 12, "bold"), size=(20, 1)), + sg.Combo( + values=genome_annotation_combinations, + key="--ANNOTATION--", + tooltip="eg. hg38_30 for Genome=hg38 & Gencode_Annotation=version 30", + ), + ], + [ + sg.Submit(key="--SUBMIT--", font=("Helvetica", 12)), + sg.Cancel(key="--CANCEL--", font=("Helvetica", 12)), + sg.Button( + button_text="Documentation", key="--DOC--", font=("Helvetica", 12) + ), + sg.Button(button_text="Help", key="--HELP--", font=("Helvetica", 12)), + ], + ] + if DEBUG: + print("layout is ready!") + + window = sg.Window("RENEE " + RENEEVER, layout, location=(0, 500), finalize=True) + if DEBUG: + print("window created!") + + while True: + event, values = window.read() + if DEBUG: + print(event, values) + # if any((event != 'Submit')): + if event == "--CANCEL--" or event == sg.WIN_CLOSED: + sg.popup_auto_close( + "Thank you for running RENEE. GoodBye!", + location=(0, 500), + title="", + font=("Arial", 12, "bold"), + ) + sys.exit(69) + if event == "--DOC--": + copy_to_clipboard("https://ccbr.github.io/RENEE/") + sg.Popup( + "Visit https://ccbr.github.io/RENEE/ for links to complete documentation. The link has been copied to your clipboard. Please paste it in your favorite web browser.", + font=("Arial", 12, "bold"), + location=(0, 500), + ) + continue + if event == "--HELP--": + copy_to_clipboard("ccbr_pipeliner@mail.nih.gov") + sg.Popup( + "Email ccbr_pipeliner@mail.nih.gov for help. The email id has been copied to your clipboard. Please paste it in your emailing software.", + font=("Arial", 12, "bold"), + location=(0, 500), + ) + continue + if event == "--SUBMIT--": + if values["--INDIR--"] == "": + sg.PopupError( + "Input folder must be provided!!", + location=(0, 500), + title="ERROR!", + font=("Arial", 12, "bold"), + ) + continue + elif not os.path.exists(values["--INDIR--"]) and not os.path.exists( + fixpath(values["--INDIR--"]) + ): + if DEBUG: + print(values["--INDIR--"]) + if DEBUG: + print(fixpath(values["--INDIR--"])) + sg.PopupError( + "Input folder doesnt exist!!", + location=(0, 500), + title="ERROR!", + font=("Arial", 12, "bold"), + ) + continue + else: + inputfastqs = get_fastqs(values["--INDIR--"]) + if DEBUG: + print(inputfastqs) + if len(inputfastqs) == 0: + sg.PopupError( + "Input folder has no fastqs!!", + location=(0, 500), + title="ERROR!", + font=("Arial", 12, "bold"), + ) + window.Element("--INDIR--").update("") + continue + if values["--OUTDIR--"] == "": + sg.PopupError( + "Output folder must be provided!!", + location=(0, 500), + title="ERROR", + font=("Arial", 12, "bold"), + ) + continue + elif os.path.exists(values["--OUTDIR--"]) and not os.path.exists( + fixpath(values["--OUTDIR--"]) + ): + ch = sg.popup_yes_no( + "Output folder exists... this is probably a re-run ... proceed?", + title="Rerun?", + location=(0, 500), + font=("Arial", 12, "bold"), + ) + if ch == "No": + window.Element("--OUTDIR--").update("") + continue + # sg.Popup("Output folder exists... this is probably a re-run ... is it?",location=(0,500)) + genome = jsons[values["--ANNOTATION--"]] + renee_cmd = RENNE + " run " + renee_cmd += " --input " + " ".join(inputfastqs) + renee_cmd += " --output " + values["--OUTDIR--"] + renee_cmd += " --genome " + genome + renee_cmd += " --sif-cache " + SIFCACHE + renee_cmd += " --mode slurm " + # if HOSTNAME != "biowulf.nih.gov": + if HOSTNAME == "fsitgl-head01p.ncifcrf.gov": + renee_cmd += " --tmp-dir /scratch/cluster_scratch/$USER " + renee_cmd += " --shared-resources /mnt/projects/CCBR-Pipelines/pipelines/RENEE/resources/shared_resources " + run_stdout, run_stderr = run(renee_cmd, dry=True) + if DEBUG: + print(run_stdout) + if DEBUG: + print(run_stderr) + allout = "{}\n{}".format(run_stdout, run_stderr) + sg.popup_scrolled( + allout, + title="Dryrun:STDOUT/STDERR", + font=("Monaco", 10), + location=(0, 500), + size=(80, 30), + ) + if "error" in allout or "Error" in allout or "ERROR" in allout: + continue + ch = sg.popup_yes_no( + "Submit run to slurm?", + title="Submit??", + location=(0, 500), + font=("Arial", 12, "bold"), + ) + if ch == "Yes": + run_stdout, run_stderr = run(renee_cmd, dry=False) + if DEBUG: + print(run_stdout) + if DEBUG: + print(run_stderr) + allout = "{}\n{}".format(run_stdout, run_stderr) + sg.popup_scrolled( + allout, + title="Slurmrun:STDOUT/STDERR", + font=("Monaco", 10), + location=(0, 500), + size=(80, 30), + ) + sg.popup_auto_close( + "Thank you for running RENEE. GoodBye!", + location=(0, 500), + title="", + font=("Arial", 12, "bold"), + ) + break + elif ch == "No": + window.Element("--INDIR--").update("") + window.Element("--OUTDIR--").update("") + window.Element("--ANNOTATION--").update("") + continue + + window.close() + if len(FILES2DELETE) != 0: + deletefiles() + + +# ./renee run \ +# --input ../.tests/*.R?.fastq.gz \ +# --output /data/${USER}/RENEE_testing_230703/RNA_hg38 \ +# --genome /data/CCBR_Pipeliner/Pipelines/RENEE/resources/hg38/30/hg38_30.json \ +# --sif-cache /data/CCBR_Pipeliner/SIFS/ \ +# --mode slurm + +if __name__ == "__main__": + main() From 15938786330d13427ef9c7febaab5b6c362e01f3 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 15 Jan 2024 21:42:50 -0500 Subject: [PATCH 02/32] refactor(gui): reorder imports, copy outside functions from ccbrpipeliner/src --- resources/gui.py | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) mode change 100644 => 100755 resources/gui.py diff --git a/resources/gui.py b/resources/gui.py old mode 100644 new mode 100755 index 987ee14..bec5965 --- a/resources/gui.py +++ b/resources/gui.py @@ -1,15 +1,20 @@ #!/usr/bin/env python3 + + global DEBUG DEBUG = True +import glob import os +import PySimpleGUI as sg import sys import stat import subprocess -import glob +from tkinter import Tk import uuid + # getting the name of the directory # where the this file is present. current = os.path.dirname(os.path.realpath(__file__)) @@ -23,16 +28,6 @@ sys.path.append(parent) imgdir = os.path.join(parent, "resources", "images") -# Check if python 3.11 or later is available and running -from src.VersionCheck import version_check - -version_check() - -from src.Utils import * # copy_to_clipboard comes from Utils - -# import pysimplegui -import PySimpleGUI as sg - global RENEEDIR global SIFCACHE global RENEE @@ -52,6 +47,32 @@ # sg.SetOptions(button_color=sg.COLOR_SYSTEM_DEFAULT) +def version_check(): + # version check + # glob.iglob requires 3.11 for using "include_hidden=True" + MIN_PYTHON = (3, 11) + try: + assert sys.version_info >= MIN_PYTHON + print( + "Python version: {0}.{1}.{2}".format( + sys.version_info.major, sys.version_info.minor, sys.version_info.micro + ) + ) + except AssertionError: + exit( + f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" + ) + + +def copy_to_clipboard(string): + r = Tk() + r.withdraw() + r.clipboard_clear() + r.clipboard_append(string) + r.update() + r.destroy() + + def get_combos(): resource_dir = os.path.join(RENEEDIR, "resources") if not os.path.exists(resource_dir): @@ -314,4 +335,5 @@ def main(): # --mode slurm if __name__ == "__main__": + version_check() main() From e5af71e567e007275377ea33dddc7f9d45e23ae1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 15 Jan 2024 21:44:24 -0500 Subject: [PATCH 03/32] style: run pre-commit on renee --- renee | 78 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/renee b/renee index 670d6a4..d099a54 100755 --- a/renee +++ b/renee @@ -45,7 +45,7 @@ except AssertionError: def scontrol_show(): - """ Run scontrol show config and parse the output as a dictionary + """Run scontrol show config and parse the output as a dictionary @return scontrol_dict : """ scontrol_dict = dict() @@ -61,24 +61,26 @@ def scontrol_show(): def get_hpcname(): - """ Get the HPC name (biowulf, frce, or an empty string) + """Get the HPC name (biowulf, frce, or an empty string) @return hpcname """ scontrol_out = scontrol_show() - hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else '' - if hpc == 'fnlcr': - hpc = 'frce' + hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else "" + if hpc == "fnlcr": + hpc = "frce" return hpc -def get_genomes_list(renee_path, hpcname = get_hpcname()): - """ Get list of genome annotations available for the current platform +def get_genomes_list(renee_path, hpcname=get_hpcname()): + """Get list of genome annotations available for the current platform @return genomes_list """ genome_config_dir = os.path.join(renee_path, "config", "genomes", hpcname) json_files = glob.glob(genome_config_dir + "/*.json") if not json_files: - warnings.warn(f"WARNING: No Genome Annotation JSONs found in {genome_config_dir}. Please specify a custom genome json file with `--genome`") + warnings.warn( + f"WARNING: No Genome Annotation JSONs found in {genome_config_dir}. Please specify a custom genome json file with `--genome`" + ) genomes = [os.path.basename(file).replace(".json", "") for file in json_files] return sorted(genomes) @@ -722,16 +724,20 @@ def setup(sub_args, ifiles, repo_path, output_path): "genome": genome_config, # Template for tool information "tools": os.path.join(output_path, "config", "templates", "tools.json"), - } + } # Global config file for pipeline, config.json config = join_jsons(required.values()) # uses templates in the renee repo # Update cluster-specific paths for fastq screen & kraken db - if hpcname == 'biowulf' or hpcname == 'frce': - db_json_filename = os.path.join(output_path, 'config', 'templates', f"dbs_{hpcname}.json") - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), db_json_filename), "r") as json_file: - config['bin']['rnaseq']['tool_parameters'].update(json.load(json_file)) - + if hpcname == "biowulf" or hpcname == "frce": + db_json_filename = os.path.join( + output_path, "config", "templates", f"dbs_{hpcname}.json" + ) + with open( + os.path.join(os.path.dirname(os.path.abspath(__file__)), db_json_filename), + "r", + ) as json_file: + config["bin"]["rnaseq"]["tool_parameters"].update(json.load(json_file)) config = add_user_information(config) config = add_rawdata_information(sub_args, config, ifiles) @@ -1084,22 +1090,28 @@ def run(sub_args): # hpcname is either biowulf, frce, or blank hpcname = get_hpcname() - if sub_args.runmode == 'init' or not os.path.exists(os.path.join(sub_args.output, 'config.json')): + if sub_args.runmode == "init" or not os.path.exists( + os.path.join(sub_args.output, "config.json") + ): # Initialize working directory, copy over required pipeline resources - input_files = initialize(sub_args, repo_path=git_repo, output_path=sub_args.output) + input_files = initialize( + sub_args, repo_path=git_repo, output_path=sub_args.output + ) # Step pipeline for execution, create config.json config file from templates config = setup( - sub_args, ifiles=input_files, repo_path=git_repo, output_path=sub_args.output + sub_args, + ifiles=input_files, + repo_path=git_repo, + output_path=sub_args.output, ) # load config from existing file else: with open(os.path.join(sub_args.output, "config.json"), "r") as config_file: config = json.load(config_file) - # ensure the working dir is read/write friendly - scripts_path = os.path.join(sub_args.output,'workflow','scripts') + scripts_path = os.path.join(sub_args.output, "workflow", "scripts") os.chmod(scripts_path, 0o755) # Optional Step: Dry-run pipeline @@ -1133,7 +1145,7 @@ def run(sub_args): list(config["references"]["rnaseq"].values()) + fq_screen_paths + kraken_db_path ) all_bind_paths = "{},{}".format(",".join(genome_bind_paths), rawdata_bind_paths) - + if sub_args.dry_run: # print singularity bind baths and exit print("\nSingularity Bind Paths:{}".format(all_bind_paths)) sys.exit(0) @@ -1286,8 +1298,10 @@ def _configure(sub_args, filename, git_repo): fh.write(' {}: "{}"\n'.format(tag, uri)) print("Done!") + def _reset_write_permission(target): - os.system("chmod -R u+w,g-w,o-w "+target) + os.system("chmod -R u+w,g-w,o-w " + target) + def configure_build(sub_args, git_repo, output_path): """Setups up working directory for build option and creates config file (build.yml) @@ -1825,9 +1839,7 @@ def parsed_arguments(name, description): subparser_run.add_argument( "--genome", required=True, - type=lambda option: str( - genome_options(subparser_run, option, GENOMES_LIST) - ), + type=lambda option: str(genome_options(subparser_run, option, GENOMES_LIST)), help=argparse.SUPPRESS, ) @@ -1865,16 +1877,17 @@ def parsed_arguments(name, description): default=False, help=argparse.SUPPRESS, ) - subparser_run.add_argument('--runmode', + subparser_run.add_argument( + "--runmode", # Determines how to run the pipeline: init, run # TODO: this API is different from XAVIER & CARLISLE, which have a --runmode=dryrun option instead of a --dry-run flag. - required = False, - default = 'run', - choices = ['init','run'], - type = str, - help = argparse.SUPPRESS + required=False, + default="run", + choices=["init", "run"], + type=str, + help=argparse.SUPPRESS, ) - + # Execution Method, run locally # on a compute node or submit to # a supported job scheduler, etc. @@ -2090,7 +2103,7 @@ def parsed_arguments(name, description): Wait until master job completes. This is required if the job is submitted using HPC API. If not provided the API may interpret submission of master job as - completion of the pipeline! + completion of the pipeline! {1}{2}Misc Options:{4} -h, --help Show usage information, help message, and exit. @@ -2259,7 +2272,6 @@ def parsed_arguments(name, description): completion of the pipeline!", ) - # Sub-parser for the "unlock" sub-command # Grouped sub-parser arguments are currently # not supported: https://bugs.python.org/issue9341 From 681722f3e86cae38d995f4eadb9debb2d56f3328 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 15 Jan 2024 21:45:08 -0500 Subject: [PATCH 04/32] refactor(cli): add gui subcommand --- renee | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/renee b/renee index d099a54..f3a42f2 100755 --- a/renee +++ b/renee @@ -1508,6 +1508,11 @@ def cache(sub_args): ) +def gui(sub_args): + gui_job = subprocess.Popen(os.path.join(RENEE_PATH, "resources", "gui.py")) + gui_job.wait() + + def genome_options(parser, user_option, prebuilt): """Dynamically checks if --genome option is a valid choice. Compares against a list of prebuilt or bundled genome reference genomes and accepts a custom reference @@ -2468,11 +2473,18 @@ def parsed_arguments(name, description): # Add custom help message subparser_cache.add_argument("-h", "--help", action="help", help=argparse.SUPPRESS) + subparser_gui = subparsers.add_parser( + "gui", + help="Launch the RENEE pipeline with a Graphical User Interface (GUI)", + description="", + ) + # Define handlers for each sub-parser subparser_run.set_defaults(func=run) subparser_unlock.set_defaults(func=unlock) subparser_build.set_defaults(func=build) subparser_cache.set_defaults(func=cache) + subparser_gui.set_defaults(func=gui) # Parse command-line args args = parser.parse_args() From e7bc29009809d7c5be1c5d0e77a5b9727cfcd525 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 15 Jan 2024 21:49:06 -0500 Subject: [PATCH 05/32] ci: specify genome config --- .github/workflows/main.yaml | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index dccf099..906eb2d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -12,17 +12,19 @@ jobs: Dryrun_Lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: docker://snakemake/snakemake:v5.24.2 - - name: Dry-run pipeline - run: | - docker run -v $PWD:/opt2 snakemake/snakemake:v5.24.2 \ - python /opt2/rna-seek run --input \ - /opt2/.tests/KO_S3.R1.fastq.gz /opt2/.tests/KO_S3.R2.fastq.gz \ - /opt2/.tests/KO_S4.R1.fastq.gz /opt2/.tests/KO_S4.R2.fastq.gz \ - /opt2/.tests/WT_S1.R1.fastq.gz /opt2/.tests/WT_S1.R2.fastq.gz --output /opt2/output --genome hg38_30 --mode local --dry-run - - name: Lint workflow - continue-on-error: true - run: | - docker run -v $PWD:/opt2 snakemake/snakemake:v5.24.2 snakemake --lint -s /opt2/output/workflow/Snakefile -d /opt2/output || \ - echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.' + - uses: actions/checkout@v2 + - uses: docker://snakemake/snakemake:v5.24.2 + - name: Dry-run pipeline + run: | + docker run -v $PWD:/opt2 snakemake/snakemake:v5.24.2 \ + python /opt2/rna-seek run \ + --input /opt2/.tests/*.fastq.gz \ + --output /opt2/output \ + --genome /opt2/config/genomes/biowulf/hg38_30.json \ + --mode local \ + --dry-run + - name: Lint workflow + continue-on-error: true + run: | + docker run -v $PWD:/opt2 snakemake/snakemake:v5.24.2 snakemake --lint -s /opt2/output/workflow/Snakefile -d /opt2/output || \ + echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.' From b35eb6ccb266336914545d7ac25d88b1ec38286d Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 14:48:32 -0400 Subject: [PATCH 06/32] refactor: rename gui.main() to gui.launch_gui() --- src/renee/__main__.py | 9 +++------ src/renee/gui.py | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/renee/__main__.py b/src/renee/__main__.py index 83cad92..fee3a6b 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -23,7 +23,8 @@ import argparse # potential python3 3rd party package, added in python/3.5 # local import -import renee.src.renee.gui +from .gui import launch_gui + # Pipeline Metadata and globals def renee_base(rel_path): @@ -1551,10 +1552,6 @@ def cache(sub_args): ) -def gui(sub_args): - renee.src.renee.gui.main() - - def genome_options(parser, user_option, prebuilt): """Dynamically checks if --genome option is a valid choice. Compares against a list of prebuilt or bundled genome reference genomes and accepts a custom reference @@ -2528,7 +2525,7 @@ def parsed_arguments(name, description): subparser_unlock.set_defaults(func=unlock) subparser_build.set_defaults(func=build) subparser_cache.set_defaults(func=cache) - subparser_gui.set_defaults(func=gui) + subparser_gui.set_defaults(func=launch_gui) # Parse command-line args args = parser.parse_args() diff --git a/src/renee/gui.py b/src/renee/gui.py index f4eda5e..1b94212 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -122,7 +122,7 @@ def run(cmd, dry=False): return run_stdout, run_stderr -def main(): +def launch_gui(sub_args): # get drop down genome+annotation options jsons = get_combos() genome_annotation_combinations = list(jsons.keys()) @@ -336,4 +336,4 @@ def main(): if __name__ == "__main__": version_check() - main() + launch_gui() From 2dbc89bb5409681d4d1f65166b8a6b3bbef63cce Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 17:22:20 -0400 Subject: [PATCH 07/32] chore: add gui dependencies --- pyproject.toml | 5 +++-- resources/nih-bnfo-logo.png | Bin 24218 -> 0 bytes 2 files changed, 3 insertions(+), 2 deletions(-) delete mode 100755 resources/nih-bnfo-logo.png diff --git a/pyproject.toml b/pyproject.toml index c4d2726..953aaec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,12 +33,13 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Bio-Informatics", ] -requires-python = ">=3.8" +requires-python = ">=3.11" dependencies = [ + "argparse", "Click >= 8.1.3", + "PySimpleGui < 5", "snakemake >= 7, < 8", "snaketool-utils >= 0.0.5", - "argparse" ] [project.optional-dependencies] diff --git a/resources/nih-bnfo-logo.png b/resources/nih-bnfo-logo.png deleted file mode 100755 index f31bdf239fd1ea464ff7b773aed1cf6e6108854b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24218 zcmeFZbyQu;wm!IVcMC2-gR^mW0>K@EZQR}62^xaCYtY~XhY;K?xLa^{=$#|?o^#*5 zuixum|I=eM16ix8=A1R>H>Il9+Jr00OQ9eVAOZjY6zR9($^ZbA4EXv3JS_P4JSO27 z_z#4WvXm&Ga+GKvd_!jRR#^@J@T3L+{DT32dvKHg9suCN0s#Cp004MX002DujIT=k z;0^@)w^~jB0MqMVKM?MPLatybloqO*&YE(vyhe65Odw-BLlY)<8+-6*0D#|}7kp`B z;tV2lx3RW$;&m6G_-zC)`1)5fGX>diL!7MyC^Y31$;9j&O~^Qy*qK-;1QE%|$oL(N zO?j2YCI27?-w9BdJ3HI+GBdlmxiPr`nd}_Rm|1yvc$it(nAzAE!6O)*JZzmo?u@oh zuYYIqCm(SWCnHA-duIzfTe4q#L56lN&H@w^zXbi`>-Tv&TbTY+lC9GpYJnAG{?)_G z%EZF_@5oHtE&c=9ub%%%W^D8it?XSKt$)+W*ofK0+Qi1h*4YV6$NF!@fo&@%_Ycy) zP17A@|C=})n}18` zWHjXgvU9S4OgW5!oczrHk{86n{G3Kf?GQQTmJKHy`G8vNLsd138+An1K}%6#12Y$Q1wdE;2DOGBy@g4l-If zkdcM$FPEd^Xa0AD|5V@~GX4)!6{XavGmCKM5$O-lW zZX;73Ms^l9Zbohv9xg_1a2T<(ny`UPSlR!a<^KjfCy?v^kKMmS-e0f-M;;r`|0#AX zoF<&cAXXqFCmRnZBfBvh2P2P(F*u;PjCp_@hMa7mUy1&IA9?@JvHLw5|DGBD%(nkE zlk)$bhNb^&PWhi{_Fu^V89slo{!4-QeYs|AX9P}J|0v76CLlJDAvchn5yZ*P$;ise z1uh(hARa~#mk|$#5f3*TI}r4z0)JulcR{>jzoja1aJu^M5-fPSe;xTRJmkQuNMn#Q z=ua;H$yt7J`PY=^`G_= z0oRXTMb+u=Is3PwDhe|Dd+V4qyMAV7JHtFcBen@K z=nnu4GDvZMcytH~;r~}|PBZ=2yrzMyXe9iv>YaDbY?UxR?IvodH`Xp3&i1oJ9?8Pr zPba{CFjZn;D2oh_M68WzD>*o^S#Z8pLx-lXidnqb&T`u=$9Y-ys;P5xZYmT7^-&m37NeMe9HE)sHI=8)N%cPciI`j<0tezHIt1D2 z$j(ximt)Mw?Jht?-0enFt`;$-A?jJT{qjhRDX-f|dphUI$kXNXFpgLgXdvkCerhh< zNiVX$HL$Rr%-?d!DjmO7h>!?tQb>s`zqXJn)S;-WJDI65JU#tGC3_jTK{7yEC*ICa zh_u9sk3coIxO7 z*JmEf#L=JQ9wRUU%D${T_2T9BHMTheItf0vML2vhdenq5{NaC#D}PexX(X9MlilaX z-2tbk{>VH|D99VXn*H2`^LPOdH92XF!S?L?MyWVMib2W?IX&SlK65skJoEO=BYnZq zv9aPHarcb=%)hGN$qe}!rm(pE$D^O`w)m%Xn1GD#!C(aq;uP8l2}Tbb`L`dIS3BMf zQrIbUmMNgs$w)=K;qB<4*iG`}y8oA0XQ#~3R=l^p_(IPyoE&G_4NY=q_)wry{CRX& z4MXwq*Ca6}Hn&Z(Y2Rr8Z>%&iv5w10$f%v~p${+q#m`v6t2y_=*TE_7z#u2Ak2i3IzoZzaujSKMq@(AxmXDFQ0+3Y8p#O zcztya2T?j%&+o=NcMo~GM0YD*PDvrkx?6*YSK2xk3 zhslBf+!l*q+7!6JJLa1L`EhsVYWV9Y9;Y3a2NoL{u~Kk;GcbmnJ|Va}f-bWD%po(e zzVovaJKWI%5qhVR=XA{IP7Ti=y+=Uo`9#dKLOME<;k3AY5pDOSqU7`EXdw$l9k8pU z2?n$rr?FGKHae%S)%`)47QA00in(DAuY@3%jXV?kyD8kERgq*`p}}xf!>hhO0%5Ic z^R*Qf1$&8Dw|v{%#0tI28jYx$t*BdA*ti?xl!Xmo(6J#xl!E>sd_iHa@jYQbP)if> z%|gIP*^cV_-#zp+41gf>Jf7cF1D*8jlDC>A7NRjogl7wO8mE<(pG&B#L(~Opsr83r zet0;EH;V9*E8sN6E(W=eufo<9C&W;s5DtvPBtO5&1Z2qH%!Qx3 zu0H!3@S7)IcLg8~``C(kHY~d?uT=_pXZqMjDWnDnHypj4Si@>I5hKs;b^lVKu&+q@ z%lOCv((%+(KMt$$!e6%hi|CM!+eh=D2ybKi!+t3?o`DdXGo7*O>;F^GthZ~??(63< zjo4*e^(3+%3CT(7FArhiCm%n5S!i3e+Tv$=jNzfzd)yE4v-Rhf|8h>w^-BaAM9Xq8 z&B7NnBY&);nw3nB%DeHLwBOUDKQpxX$2jI`+87OJ*PXSZOK25XwaZ^xh!37^W?EWfj24RkW*;{_b$&VSpp0$uRoDL9%N1 z_s8R#QV~or%u7|@yjNP>rje{(_#w>Z=JI`oTXiJbLAj$iF zCj%5?y8tDnqjV8B3Z9=*Y>L>iyxOS3P;C{*K8a!m?8y7yFYl~C%2H3#DiOlJ3K-<| z`ccQ+D2ve>C$3jyUoEzN>cr>Md7-iCTauHv8AMv)4KLqmq>wmGDGCCs4zC4SZ5sET zq19jz^fmr9Sef4nGb{vV!YjpoX(32`6VjHgA73UX$iCSxu2kS1bdpY5j=z$9Rq20V^8+ifOo;9g% z#=vMi$pN3miK#Y{)RtoqdVgMGcbw2Kb4L0v8=D?0SZdc7EP#CyoM-)4z(`M~vlOm= z=-lZk=JUC~AjR62-8?3mzd2a;k}BL?tFw0>!^o8H90b*ezHjk&zK^L$!a z6d{Ye;SlV7{nuJ-p7qP=vQ7l{>IGF*XMP?@EUp>Au?A|S1P9~##gL3U1R#uFTwTq( z)|I@p2w4*!S$Ri*)6zd{**HJKtss<5R%^*D0j9g-^K5vr{Pu>!`Y>Fb%=L=^_s=tu zQU}3oRW&%TJX7!5?d>?sPhOKGKqtq)EegzMq4!7%O`qW9 zcE6ExcE0>nN?Mt9F?h46&l;XLkUV7m5nID`zUih@^Q};PqLhkT%R!~|Yhk)DJo_~9 zc$Q3kee!oK^GdXgqr(~Ty=Zfn-jh=_(JsVvjD7Ie?bfgnlOcuf4s=Tco3-eqgV?$@ zGVG~S{!+YEOr4IQ^Ty1Qe&#Ab_w}?$zIx!(s z3#2U!=wghbt@Y_szFEA2?Y8wjY2z6>b*d(%^@2>0x@X)3(JD&PNXZXCPqB$$7uNKQ zXT&FRgfThpfO$fEp7!!A>Md#@Iug?fg?R9AuVjt3c!-Gg6^ z)qDG+;csQgc!#YYtiGoZ^1ANVz;nDUcOD!XdY}EWr%~pbO}m|y&=;X`0^8=U+Ehq= z(iZOXz9DKs5-hk_)8qpyEjo)_(bX@p6pVSr6snqb1 z)bg|V5o_)1=L{aQl)a0!H7L_eBN0F{HD~)yU_40$jZS%udDnms+g56rGOsx`*)w`)Rjh&k1A*vL^TGGlDAVh8Ei#-}wR8QI@&C?`o6c z>y(hfW33j!9+)qmck3&;9sPK0*b2VWwDl?D3YD&64U?g>-hrm)@KS`*B8iSEf3YEWyYg9T<=&xBHmif+rUPY<{bZ5m#Q_L! zEDIqnM-YI#8JQCtua$0|b$qh5nxDn}DJ6Gq3-_l7;M|J6%by>3*{_nO%ENB8kQD_R zSg2b)HvX<@DlmRp@DTFc_Oc&g&CNtM74<~SH>6o9q6W2>o9A--iFY-7WnrBt#oi^p zA|TKVzOnZEQ$8LfwLC*&FzjxNT>;4W+Jnu5kNRGeTEVH{G=je^GIw?OR1&yRUKW+K z0z|qy>9qOom0C+_6Yu@@#haJ_1o|quq!}^*ku=o`+LsZRhj$E_*kGb%2=KAVXH{a#$|HS@5l`n9v-qUON*n?hU4>54Up&KMmu-*1N0d;%q@LoF5XT1Z zURG^J+H#tjb`1M>z4Y|Pmf2794}Cni%2%`f1ev4a9%_VQ^xtbTEw5fCEB{cu>_eS3 z#j~f}rXI#0>TFeQ25V(_U4&-vXHwiliJRbN__;NpQ80VNC&!#$7 zS#oi0p&RTX4f-5(rUd7B)Yne*-$R@rtKP7_&bf&7N0ND3=p`lfux%lYd{-6{!D}U( zV$d)*Gfx2`oUwUE4LbTUlN<7=@@w8_XK88cD2Ct&^6G z*oH@<(%WPcxp{6^V!ieoqT@~1prN{XQ?|^uexx>4rjY$e|5?py$I?wlN9Q@q2*}ySwderkXj*@Qr@13;#Imw;q$LfG8S?WxM~@Lf9Z2 zqk!sYdqC%MaX^~XL}-N;1BGben?#bYpv&};lG2$ME039$4xQ0*&MU}%U+SsysyR6* zk$scoVrIYOL++VfVZ8zKezOL8-7#m+28F@UcjkVN7JXl=_@AbI^}o>HkQNqlv~6y_ zb}NfEy0qsm=|O^gZ*cs2el4GQY!vS({EP*vFgi&YwfZL_F`v^%h(>6UbNlj~?gGiI zkNMhWsFnuD;hl9l>6xH5VQK%jVuYS+1(A0-E8MzJTtcLMU8Jo(a#voY&=3A z?}NZsZbQ!q29X|r7Y9V&G)q?1>ArChou|+=3D}g!W>m~LEBzmpGp+H^MA>zkmkCb- zUnit$c^*Hrn^s@5f2G>IPXQcxSmXp3f^)!-8;MmyvDL;2#o9e#_K*9ys<|WYqtH2x zpA^Ums#@_4wiSuc1bX=PlY%>?%*_jw3|49tYD-^Unkd=V+*ZW>`r7mMc*_I`dQP(< zwBH;1`Z^HZW*Zc2Vzw9TS-I^}olTz232-od&G|g&I+%!>&k&bPdmQ0zFuFclY5)9~ zd3t;=<(E0}Q|hge5Ir%in1!B5uhTO17_pkJS>&mNds3au$>Xg)dVgie<)G$Wa9q0B z+YM`jYs0aq^iXqH=M@$Q4zaMI)!^_zNaTB+t|5422lPM)0ve9t_$nQf;IG5*z(ogT zGOM2(8SG8hYQy&M$&iPAYsF|5W{}0uS^huY1buoQgqNPAl#$cTp!b;+A(}MqM19yX zUcj0ry_sEaf2|cy^V-2Nv9P%TC~a>4*seqC{BmHP9{oVIYv&GYXL#}^#R-O3!7?4f zqR>bPHanF7mpv$mm&vK(E8o3@w7UZp%~LM|Llg~g>3$sc`B;DHdTS*1R!i7m(|n^^ zAB13aykB~E$iSu0mfUhVWF=TaZ?2ceO>2l1|tW`kkfn22j%nX5T4wSUr=OojE_ z;poYIu4*fpS~}2Zz;Cuc2=CIyqvKuDF=9*XPjeVqt*76(<00hh-_}(zi`M&MTTC$& zIOU>Ly+M_(|LXEShL#2)7N|Z?u}{DNVoJ!BVa2d-v+th%udVc|wV-3KLOLpHP<7;VFqh>k zB5|!WOroYESBC|!lacQT0dpT!4Ep_1ogvD3*S8YqmDVYtc!4*g^9QY$5o-j&Tfwh% zd|VHk(-`T=&Ib1mAMt4AnxcjWYHdfQam3zJ-?p9%@_4w^)3gs@rtt@us#qwx$aB{6 z4;?>ewhSViWSA*$ljtAqf+JN?jGUXD#IfXOJ(evNck^kDFRdmwO!~e3kGc6_!|xAH zrd~e0)D5rQ{f3Bk%nt=e=U-fUkTVIgU`25f%wP|tF85DilB>zh8iKnC5|kV;ib>!% zp;EL!q$@c~wp~PUn#T8Tb&X|AG2uXkMW&+7lnqS&<-zf_{3%jB#+R-Je*z43_zHzK zqwV#o_hk1djc$QRkUwBk(CnQN2=$_ae!zXGmWoZKGz(yv69qzYF#>ks_Bx@ zU52q|_dwdiA~q?#pcWG`Z+@9U2+|Ap7vNp_rh+w#T3dp}s7wju)Vmd2!@$B&AGwfn z`;l+_F(M)kiQ-N|GGu;jj#(egTz7qaU6E~@hOJwtkZ79=dl^tg0|N0#XqYr{v;3-;l=vVjg5EP(1VZ(R^M#0Z zM1&LbI(ADFGLvrwivq@0X^%&x)q*zGEZnt;vb2fS##JvF-20r zp^)QzCM2CEkfMx=d0-ad%LCKl%nrT2?1_$rt&raI?nqU`(#AcBrJq(NHtLd0F^9){ zg=yHg^rvjyaVk$Esq7x<`7i4ZYAUw+$gW?tW$l;VMPM;y)D0jcj_UzI-F?%e&BZw7 zV+MSv58(u94jaK5NV$t@G+WRXgD4A=-==X^`u)KZ=1&RLI8V%Q-%#j&lxyA4h)D9mDMYcR}78s zmkf6YJozCe7)#tpl}<$khc%mSkf0Tc^nnh@k{#q%>uit=*AH=u73ainPsWYP?In6w z8^R=Yd@ki!p}DCnHTMdMqbjPtipTO zU3ad^<1Tof83w7Be$D%SZy_q%bdt$$o-e4^{16^Oe>0G#c0PzM>oyyV%ZI@TXFGPk zULY=M6la=v&F=9X@zD)i+#q(R(iwtA7fnNPZEI>!H-{FIYstWOCm=xW0D+nM_-%Yg zAZm*biqI#^%A;Xc5le4wxND&w*H`Au+~F6ua=UAvrpyRFzhq~Wt@4W$c6A~BK>Jxe zu54G-Um_PcGHVYJgi-ViH)_CYV-myC6ryQI z0AzVaHAKf3*hq-uZQIM#7gSK!xF3pR{$rjJ^^$^zzAEFYo5*tS?D2axe#TXW7~>qlSP0Z1^JofK?6hKc~Fu@x}9$UZQ1<^~j~CUB_G*qR3v>flq>#^**g`CD&YA2k&~2oID&v4$9Ll zy!zo~awO3E^yhDaKAF|89Df|ZL1SwsjO=WD{}Js@@L_zUT7xcBqoyVF=JB(U;V|p3 ztuUJUp=VxG)91*vYFI=Sq1+HQoXu^A0NV7p@Cc!XOYH>x6(ZpWH3PlWLTO0Z9rtIJ z0L2c+zIWpKH$=M;)wa!V%jU_}oY>w1au;b-Udwx?@p8?%?y!$ojNjzo7tWe;)-WTf zCtMAVneLqQmNUo4+!=L1I1%pnDz1F1Yu6b@i{|i)F`zhc>yz!(kP@i_-dGPF5npjs zDSj|Z@P!J-udEI*Ewa=+Um@RPaTiTVGsfvzv&%M{-nM>kiVt{Ipl}zz0O|t znpcr#JrAQVDd&R$atG{oEGH@{DNO`v3@0(Z$CZ2KJ4?v#$8pSaw@rNfVn2iB2$`DZ z9EbzU+h#EdAk2Y$OZ|rbipv}_kf$qgkv59P2nPOBLbMTEpPo_(z+&Y`5M0et3La_! zgoTPLpIi4%WT{dPH^-bStXnxphIqhT5>t^Y8?&;lgcfz$8)h@z6oSlq=(Uo~`!Qbc zahWE2lIk8EYegz_OGU285Wl8qjB+&gk~|IRSB0yw-|e${gTiTavP(ZNwLpnTKi@b} z;|mEoK>H|N>K%Np!6v7o1!cy)Z`6A}yz=4T>QWhq41|g899^n$Rq#DO5F`6mw`IcT z;GEsuw($LB0DNZpY~wmwQ5-?vWa?`<39tOB4sO_7ql00=w;)u*b>hk2Ly-LZF#okpg# zxUR|-BPyWm%v3s+srNb+M>S0$cX;c)qPjWPem`A5h1a1=VAvP3IoP4Re0iFz;uoYi zX+NX&5t3yx0f-;Fh}QE~21qkKczSp_;oS8BN||gC`&0yovSW;BXEt{zVV;6s4@5AH znGM4)@sh|$GLyI)~}g0z{=)YbOpvmaJ#2rN41-Ia>eTNm0ktx~2B z5Re2FtMRn!NwI&Obfbi3%m{j^u3o=$e~vgNi?#WY?Kh8v)Cgk|SEVWF z3gBK*TX-D0o>@*5us{d}tZ7PYq#$<+zS<4eaib_lHmE|=*rfR}GapntIWxqlXv%Z7 zy<5K46D*$$Q6;EPDyYckB#czRa`c=AIl-CxvLO%YVd`*fGQ9>A@+-Sv4K`;3a@5%K zk#qT=46cwNw5i_^8e_8QM{x+Qr}Qr#Kt-w!H*AVB(g1=yY};n%ar6#4*2xaVNQ2$z zIqr*H^eQjAbx1|`tH_+o-!P8oUUrsbt+w9{t@wp1&zF|#VFJiapTEC;ZI;FFgYivg zI08jm^1YJMeBE@*jvfjPW@_6kd!TUW*rWc9;QjgI$Pfu3m%R`@saUggS$ZuL=bpzb zac~_&`tmpaP(t;B>e{WeSofms6E_Q?$j(&p{==X%E`5$O2Vt2GuX8rHS}G0xy*(wC zxov1h4HCU)lme}oeDg^}K{0%W4>dvc$ybw<++fg+6o-Atcm3<+Y6oJp4NSd0vEG3~ zZ+bCka>2)^KH&>wF&BY*{>MYsihx^4g6g_ho$#CT5811keRZ{XzJc6=T}h$%3?r|j z-bGv>V%OL!)nO2@jP>6C^!S{oqKH(&A@>$h=GJjza}g&2ce}me@nb6zi22=tBy$u| z8Z5*umB!3`&&$)qj+gis^Jp^4r0O?GDX(t&Mu2|(X^rch%fGe@;3u{%zUghdJbS{n zT`IUU-w@PO02Q{5Wnr0W-~%yJv(nvtU^yhncf7f_)p=E=b8vb}4!zfe<<798t}ypb z7jT2@j$5(rHX8mO1&U+UZhCZ@G`GToe%+jeV@!%#q*=g)Z6d7#k12)KYui-vTgg+X zPL(LG!9>~Z^}&b&f()IN@o_N zx(EOAmMmrF0@oO=$Q=pt`g@jns0Ooc{gUZX*`k~)K1Z*xN1rcmHuGIIO%xGfp$#1R zPj7NzHYD)dR!>6$%y+UiBlf#|tlq5~Yvk((<$nE1&Pdz!*-x$NxT2$Z`w{g(buI-m zlrjLkJ->w_eL)ulBV%G>@-u#sCzXCSpjp3qog&wp=z`$ZUTa8Tp2(orho%$k zpOQFL%~m9E}m|LWo5BaVFeP7de26_F~^vE|=%zkwfzt z_|D>hOhysIFZWa|71kAy(k>f=a`C2wOa@x; zy`TH-=T)>7YCPLxD2-8~jHK3)10NaNaNm~wHR5dMRhQEA_%DJ4oyaf&{h5M6*ZTqf z)X56t>K*tvVRDtjGv>#ZuV16IWv-bUn&MxLa$!#vyTaPhH5H|H;i?8^`q4hh%5u|% z3Z%Xh$rfLrbT@LONhh%IJ}-3WlDKK=P``E~M+i5FfO|Mb%?U-o&46TMJ}neM@S_+jz^lCeq39FB(5i+=+FEf z&wujbJk^$``IJZ|PHfPoFwTbiRDIX!XPa@i*$sXo2u&Xq_r89Hxz{}xD1T9*)db;hCTL^t9K@eu@ z`evgzHxvQQKafX^_0TR3)yUtA49b%>D8i8~=#{dfjGT=YfbiC8_Lz_LbSm)-p9SQD zNL^k(OyHj%o1m(#t9!nBlYBy0Yw8E$(rYT(LH#Mb{FRa~gV|DC&u=TKA-mrE!Pp#Y zDo3tU)Na%4(!)Xs^=STj*Q6!prQ%B!Z?aGE;Qfk#)jkyZg1wM2R7!4>m^i3l!Z_O{ zEyjzoLeNMsgH(i-ggaMF@VV%9W=b#a-j8DPyL9s2xdZ?>$(fD&Ds;`d!k%l|$Pre4 z3}=PTg{8@Gyw@RhvidslOWP}ywaf^2dnovK7vw1QM))a1NXlN2xzhJYt7dEA@g*j} z;dMz{MX;BO+agDS^`+&GgOdf( z5>}zHfl>a1SF7@!oJ}6xk}}-fD*ma*VWiy6imStoevY|NZ3G--f=zEyP*ue}{f%YW z2+2+v{z~IK9I59;%b^|VZQq(exp&|mZXH6TS*Anb%!eTAgdvs%N@LhSWme?0VYm?t z^L%cs=yl?B6}}!$>svfgn9B&jwE$VLOd9w>FAmmtiGmqZo`+f|HNg*aaAr$U(hPKE z@25sg9k-Xfjl(rhDH44ErXEqp8y8ZfB&}O~Nx{{Vz&w;t5R(t)vRkpPiEt$pJ=zARepNm-PItQx(+W|7nG=ibV@^C9|g z@bLynwVIEZ3FVyWA1r?Slm+>Cew)GWHd19LE$4IJR>6{a5V_;59{g~$)kd|Lz3?!9 zPfj)Ca#qe_xiot;+SXEC!v>_I-YH2t4S>h824Zu#1c#Ok0m`XQ6p@VTWr`w0C;dYR z#NJe~L0Fm%Mx{A^xR)+pX0JY*jKk%?s~Kll@?jfV?D;HVn-F>i&wXO54cKvi4PA@R zo|s1blRWhx_q+7h4h*KDt6dUWy8(P<#crADlTTk#Jv=?{ zr*-U9EP4&@V!0#ITgD&zn&Dd}1bPlN&-v3r{{G4WyBDOn-Y#3@q^Pjt5sgUov8Yll zoE+dMLJkj!cb6Z2^(rGVgY4vZYo(w+DJi85PF8{2&6dfphi<|1UqLWb3Nal!*e?2Q zPv5RsSbcG*zt3bnSDplp*AsMmJ5Q}ay<{Z&Y*8fhJ=u6duV}Tq4JSdnHUTJ~NpyWE`3$8wB})VEBIwf_N+-DTQ1v5p`~o9u<312jUc zItK041_9ExF_0*7xWH!j#NyPuFOh(};94?2nF%U#%0bce|I`@>s&(2pP$R%jfhU9D zo@W}8HY9*QLKx2~2(dzM8p}x7Lwdt976juj=YF2Gax@hisO*MXCk1|??ez45w-V-L zCW9;vC>x6u5fP#HXnFC{-l%LAor;45hC zla^Qe>KX5le)M~j3dWz!V_X7=kn?Q}0kU!gavaEe9B18gl4w~5Q~gSCaO_u|ayeOO zNkdbNDXSBQ(kxp%aa{$NydEliNSSUDR4(Y9iQapJXB!&5eN?YLg}%i|)PVREJ)f0* zpE`+(SN+xg!5a+4Z&bG#9flJVAA+#%AgvyE=YMY8mYC~SN zvw}40xJwM*C`ori#BPFvgZ2GVc@HbrnXB9tnTYN}I0NLQWO)7Res+qVO_wHzOixQn z0rCt~G--Y9>vFtLA5nNVeJYM<%5G%KKR0abyV*UYg@&FXzlQdYHBPB%x(IQ^mpOo? zSs8FZ{Ghl$8YXBZ=D(22NkoTZgRr)x5*-V@qd3(m3KAh3POhr}nHQVZ`MyqJ7q}tF z-5>7K4k{%rQE~MbhFt_9gEiQIA13_8s%VqQmH^-hdx%2-V9t zHOoccGQNqa6`USpTwL&^ncXXXs8Q(P)l54yJ<*o?s?B(x$O;>VK55>CqJ1e#{s2nF zct1Tsgp!?`e7!Wu-UoJW1cIOKH1hZZsdh3lM^V~gx$xjuE^-Q_F>4r1SuvO&DaoZf z=4h6)`{Y*?pI}=)bUE~1e>h?TI$e`l=hF_0NxN@)`;ddIZ`F2Ovxhzu)3>Cz}hrM|%7Fk$qfw?zdITLXDgLa2?UDJpq70J(U`?ze~KlVGP zwjSC3%GYgN=D-6yRdkPd+oyBmf~dY>whXFb;ml#UA`16Tq>F&X4=7y-NHnQ9EHVfv zao04gO|M467g8ry;1k27I-B8wJKj=8Y}T-uty8=A4$v^8Ici7&Y%jO2Pb3Mlr)Q{7 zbE~8C#uE~G7O7YUa3D{hFYeZunuEoZ6(OC=cQUhwJPoB>5T0*vpVI1wY zTN*M#n$L=E9n?=+^WFTpF`obbOh_ zvvt>$(_HTP@N;^?GZ`w0v?&nYnrIi1MLP+J>1hL1++E{ClE+;5?{VL`WxsRh4m~ZQ?AG$ez{E1LcLtgQN?{O(Evt7*&NW%ogWH)Osow)0(COv z^-`rIDCS|NO!r3J3G`kYr3@X@EkHqdQR|hn%|3m-uLoOeSSB1XEvc{8MaQLo)LUuv z)mU2b)y^B%F7^_za9zH4ubDTn$}oud5Y3_yVQ3@&^87>}HN5l1@A8hDS}@#7&qD_L zz0S*zN190bgN?`1K4t}dFSxalD;gA(P3jy@DP}L7`fYJWguXOZgB|0h4a()R&T5k# z3y0}hg);kbhvF_5d)XsG+v}M(q%u-Mkkg}=M(^mXqyRV6B9^i?m}x99j>GoXUJGhW z&?;<>9RVkB>@34QSO%-wxJk?A{kgI{ipQVq<2I>j39Zg1Ts~(Vbi&JhMo$aG9NrqT zJi_?s`xT*1W_uAPw^y2n;Ao-#{oTf`(`q}=Dqmk4rkNk|2rl0BVfdw@^&pYd?d8Ea zk%JAqMktfO|7o4q%M_91W_$QBvxmf&1KV32sQi$mwEgn6V3DRXFGAx6{O-zrjQ&j) z5qYZKJ*|}9Y|GWp20Dk1^Kw#DhN3{&*J#<9_WWVs1N+lDK6MIDhS<=eoUMY3=QJ^mW%r5P zGlS4;T-hX#Cn(Kj1obaPms3?-LLE^O2_s?Gr!NjCLDcdKp2E^yR!y9 z13(iFmP{j>R?oB0+5C3t(B&0>+O73ULyCP+#F!zC6LBk3tRLwSU#mkvO-+qm{L2D{ zgV##?L3eR}&xm@f2uiknq@vkIR$@_#;W*S|szw?uPN0DFWtsOnXh*w+xF{9YC$xZy zq}9auM$5g6rRoC^*Yx9x$0d8#$QTmE398PR?VO`mm3%4THWX~vTqz@h=%{kiDb|K< z8ggCIH^WbLY=_eR^TX_n@Yr1Jb_T9HiU7?Kq&-BtoS#do7rCyps;kympYe8cNMAuh z*=`UX_VVYqs)4mM$LKbsL~vz5bH@5{1Ru;))J(&S&$t9yWga8Q|3tQH=S$8^yN#_* z#eX-SRGK3YTY+Sm@wF~evtchFvN#`=fM$%*sE3~Ec(V7zN8q^~T7R?W)67>3Xhzen zYEjH{?bA*w1u2Qb!|g9aI<2KsR;xB1%b`czQX-i0@5;SiRzBV2NX8{Apf6A0a(xi> z?;PTz8P~=fsEaW>hQs?I-7OQVhrQ%Bf}LM)j4qJP+}rN9sMMUTv^7xptbM?k8qh0-PMbcaV=cj*zSVN5KN)pUn6 zhQrW5!qMNv&@plQ)+;{;ju&mj`DS)Jk9{~kp!m3x`s}Noj$*UefTATrH(Z|0lL8-F zFT)?SEwphsoq4p_crU{AlGviaudJ!GsK*o)xB8*|XcuLF-Ze44%)3(4Y6?`ry z_a-Hip=2#6j|#)a)wZ3pCDjWtAuGea2K>Jc0+mtI(DqV=3M&M_3M~K&vC~N)b}95a z>yag?Eb4s4Oh2j35@mSZDq`V9qzC7ysv9A7tk0-bMHou(6_U!h%a#c}^Bk7nRDG?J zN|ay$xi8nT$(WG)ognxp^*v7r;K|_yCrPyaD03q^YH{2>d+!de2Q7tl-0Ua0N>j+6 z8DAS=+EE-M=HKaebqo38+Ih_Ji{T*fhM?|P;+1@y+CKJH{?w`6213?6#tiw`Y>BQ#Y+!g|>6`qHP<*edhruixYt`9BM=_+>AO4h@}o*PLZSJYsm z&|nDA^o{XIH4UexKE`5axec!+Dczz{3;&Zm1`m%|n+DzFM zRUe@I#@}Bm!g58rv?4v@6!e9DS+$B2&t;t`2v$EGn;}41cb_id5AR_vfp#xaDpwJ4 zc|)5*m=7!Tsz{S}=54xh^rRj44IXe8Id7h+WqhB#%q*=H0b&I$DolxG71@jw2UsrC zM-6_|;8W%8yMI9?A(7l=tTk)Rz{`98O-on1W1-mU;6~17(eY!F`aooL8{pL5K^7kyccEo_Y>=cOt|YRCxL|+Y*;BL+$$!^;0u3iSV5@tPLDwaWp5uxO zKbR24{7bbTFEFBIF*n>IFSyt$X8P_hY%v)3DArI*tU4@=DvceQYw#)o^-KP)-|LD3 z_Jy2K*T!0Gl>?PUBy_hrRP!TN?!Jh#*KPB}J6%5W&B#U+MSl~h^?N-;* z5)qDhSWOdPmU(U`T3h2MK3TUK^FmmFaheHEcI#Zq1qLk}q1-fd2EgR(Px#o^EQZ`E zD(uK$6NO=^+Qdb6j4+rs33@&ELhz#(MRX#ZxEWNs=8}a$6`9TNd6{E?b#V2V%~;OvJQ8w0y+Xg^4$f6kQ(R3x>b6BlWxNI)H5@hd+Ab(VO)!bUoom zM9-7FTeXMUJ=4h)ZdKKm)29#t3VP)~le_-rQ&uQZRbBU4kJy3#o^LLN7}i8!cdKSF6J(MQPIcTIuV|I67!`;l_wHV}QT}(a9;c;Wi;vmVVG&cMyl;%@d2la(?D4UD4#8+(O73xE?LvB2udd<)M zalrwdl$eE~r~$aTl(43ADY?Xq*x)2e%X-5~_L*m!I548m^LB14jt&+O-Q;=*Ys8HUpH%Fb$Mh$N&n+)NtSr2@I^%X1D`l`fQ<%*-2u~iWDL32a4&8&^Z%cCH zs>&RA29ndg6X6y4R4Bxs1E~FtpqPgHcaYMkbgphpyoDj)U?^GYjhOO8R`8iFXt?Fm zTE37x$fd~QiFh`-Ilu8uVyrEuIf-+C4>Rdu_bJUb$KG`US4H+9*TiFueH`Z7bE<1!5bu@ug@O<8q+UT_nW z%C|0~m5E^Sq#KP*r?$O24)4jL*3e%V*SaIr+d#x}IL@M%owMdjHFz|l-eIsBzc)4O zV1-9}nK1Nrj9w?>jKVo;Qi!~W%V2-35JL1~p^gdq67S{yDFH{12J!;Y8!d(A>-yCz zO+PcGpD#@xzi6VRMAD|2=f!SKkIxk6UxiWA(~_6CJRpwgwO@_9r^xnvn*6@vmUk6t zgSfx$uy$gmb6G<6jc9wutouMnx89&9p!nC`lKE(!_o4YLx$R*=oFg|H2NsTMkIc!S zGnVHQDjZMWHxgd@V~ly_#y6{tGKp@^)w!&9Zm!>hDAO&_iA+{WocB5rcwMg75HlON z6j;;Pjd?jWys$oYhmwKUL*%!UY3`pG1Gz_;1{J!gphtZbc0Q&k+OxPP6N#!hiRvQozVcukAA~8_t*stiK6f}+RxSWfW%iUbQYdp~$-Uv70 zQxanbKz{1pObhGO)!VRQgvo1wg}Hs`>E}huXm~Wvxz4KC5i{y515-|T z>RXhLD058|O^#3vyFu>SbkVg_?9B{^vJwNTUnbrp72$&n$31!;_P$TbSY+W*nbU%k zC-N0Uf+lEkr$_F*{xfHs>Oz=GIg231 z>2CMxujO8a)F`HeDGCx*CJLVAde8Mq3jWMIy^s+Lli<26b2&&jyqE4<2vo-C5hG+b zJSJRe<(%x*z1*rPPHb<0)J0u192+{r8H4m%E3$uHe;5%a38HJ#$tqm~ERo{*vt^>; zz-@?6g6t=6d%W9LInC=ed1iA zY*=Jg&C`eQ!?Uv}Mz^N(ic#Kb6XI5{kVx|LK5i1sTKQ~mx*SzK;^pM-qaHWAi*H0i zuG0X9d1ORGpc82#nNoI0bH(Kjz>Tj0WM06}do7h~$uDmP-%2H^uFN|MRxk7`^&~x1 zLbXpr`C315N$xaLZ!UdM0~|ZzrkWU_R0O1kc%8c_*?~Wrw-6b&fo#$iIMg?S}|`?Ct=g z(YliIZoVWWbVH|J0A^QGxE7ydcY>$!qc=abT(XxGv#8+r`cjnbKDY|5wFITf1^E4(=l9%=W+Gty_7GhBaTl}l zBzhP>S1pE^QUq$GTjOWjJL~*bUPKzULxb|a!5T!$9$+4Db3y8Wt!adnIS~qhYj7n{ za?9r@6glHI6-!AUU;oDOUFl}Wt_qc(Te7{Xpr}w|A}hklIq5OB-5ML19Q3SiB7&)> zZ>4Fq@O}eg|BFOnVPHv;bDAMxwT3m>mf{rM0!l^L`Dzg=7M5mEDkz3!0dIgFAoug@ zB)$}`Ch@3b#S|@h2YOyP1>&BWosYBSrb?q1z8WdrJ_|##(77`<=vi<62niVpChJ<; zP~rXdL(36UOt=FYf&vNO0@Kv!4_NO;$)Img2{gOyy_2vAP$Z7z`urZc;@(!5#7W{^ z%3yHP7#Mp&PnNX1)s2k(=EY2RP7Vbbp~4wIoA>}~Y6owc&J)rR2Hd44nlEtpmsG@I zYhOxj+*7)=;^!bv%!qm{1u4sI+`xV}3x)=@8P zbmUCXyfACe9AaOG=$NI#jO0gSWj5`PMQ8v5ZQ#Jlr4P*rgrie4!k;tdS%{|J_vxwS zK68n$561?g%bcxegK(qz=FnsvWkpjqcRj{{$=TWCodwPDbiuVc}!O-qViDU1L(WEi8tQpUl-| zsQ;?>aT-(Iz0bs9bG$mncTCoVGL;oYoUtX4wgRN+d3?%(Zw#+o5(&q;Z_lTgN!W+$ zOx5p3YH#N8<=3rrh+tG7;-EnX{ksn`Jybw}PN=@rgj~9=KKYi|O&MmS|nhzlhUQ%JUSF?1~7K zafnlxxw*@^>c;CLVN&P0jo0EHU0Omecqi9m31&k2yFSz9g@yV9IId-^5{D`$CIMl7RH93-+IM z9?F&njACjlDk?5l+Qq~bCk~7LED9e%?JU88zsz5U-_*SM5*2ar*g;9u7Onl>Zz0|Z zF;$0sT74*eZ&nmHmHpzwxpP3`RJnZMq=$V2keAn*p$K;OvlnU#=Buw$e)QfE3oF%k zBZ%HpO_MKLTTV>8T-s;>gN<$E4J9{Z3qWj8r5$2k&ia`3TXbItASq(c`;5jYg8(~# zFk{my#huci;(}??bjuQDQ_HdJLAMv7TfG~v1g2!`Txg|I@0-lz=ySD$*Xs6eYX-dY z!Y@vIgyE)S#d6?R%=@c~DxXg=S8AuoH+MDuI_yifFqL4 zt#fO3vsZ`Bo?SMpG57`#f3{VtvPR_c%-#J3i1a;T$A^xV?7WNWE29ZkGMHm!dcWdA~z{ z^|=$;KjYDalHy~6GH4#!)0f`B_!lMoAH)_8AQDSBDWb~=2^GIaq$yY6;ep6>f@Rrn z`r-H2_NK8|%zk9pW{Usm7~GcPwd0JV1F_^VS#7^el(FKh!5Lb2n@YMjQ2J=m_+qry zWN7hYBx~}Wns9U3UV&D~Mn`S_5q-S)3hPPjz63+Oe`3d3e~EOJ#y!CrXZ_6Qjc@f! zVxWqvgdy^*b8z<`0m^GE)r)0}=KF!}40fDpuL+~X5xyEWk;jZ@HiSlFI*Dr{&b9Y< z2qWq`KI{q~uLFET^87>Nmx%j&`c1`mgKFhE5KR46aM-b+O9G~e1&ll^oqr?CV|3LK z&AhvrV|@1II4U25z>3{nQ{)_M$~$x_w>b-9iImE99=!<7SEWB=y^+E}#KBX>MC4S$ z(=9W4IN6S?xN~PWb;w6@TM$dT5WzRN4M0bYI6 z8{$?WBQo|fjjFZ)HIgc&CyLjWSH5R88`ltQ2wu%tS|&4}BpSXY3Val{;O4#CxBKab zJ!ZB4Ooa-$5@p=mS<t`cn820N5H?r!74Ce|Ppmg&Mo zGTr^_d_!3ajartm{N|aB$PG9pz&H8TCVYW$4D2?D1w=ZQT&%~{=Ef;CrfgI0%-drd zecc*Vx6y5M#9ThrFc@k9<`1z1)*wXegK;3LqU}} zA%>U}Ywoe^=zM}ZlhJ1(48$cQQoh38BOk{&w(A-h2?ZdiGQ3aPjv^rV0c#{EIr=^7 zekSL2r=m1hu74_u8vr8avPP@d7Di>=E8{uzwp3_oR#M@k$=3p9Z{6-!r-{s;*;z~6 zJ~DmM_n|$4&*1DUyj3A);GD|r$~Ds8a}`qVoa-@kV~gD2Y8(!Sx)`93>mttIj!W?T za>`G{TvAPSGGWYU`ddf;i6tlT9sHfzVeEjnbF3p`_*m5-MbTtkkFN1)359cWW3x?y zcu4SAz?p>z(sM`u<<0T3ku0Y+Xc#2p$!8D}I@8!}OAtP^ zjeIFQich0=DJjKruOvM!C~*9*4fZUAopc~99wtroXk(s_-jF|Tgg5)3+kVWm$506D z;fX+$!I5}xVWO-KhaXxU?`)Q+|5Ahc$xI2IT8R8nFARB!CN$JiediHC)GRWBL=d5u z3Go;v-u3;upfe-uCiYn@CrEeEfI>_~sir#DS_9;IaC_DM{yt#M!N}!ojvB8KA68U3 zOk@f(`SAX9EI)wuCro6uqU2%*Ns;D9L%v83mi@k6N5@5uz?FoFZ2 zk~KlJWs4qH{ZkoRO>=-q+_%~0KA^;|2DZE7gw!5A7&(7A{H$wFJAn9~hbOdGlHd;bV&A%8m%TmPnxl#!w`LRh zv%#OdNR)&zu?%j=xpRnjrnU$@km4l?h~O0sgLrS=H9%!E#%{UU5N{Cp;){oQ`wzS* zsNgzL3J2Sm(0*n_+&>N?ccRe|u%FI)47<`)Y1#JcFkJ-LyGc!MYAhZkW4> z@D_*p*DMEdp%L?1My3G?U;#LHRA^R)|JK~6{A!o)VF8BZBJN&i{4lry<8rf=8bI;cpC zBc4C+ldXq;MJThX z@Y^kY_D?jr3+xFhC$t+izkPM@bc=9sPOdNQHYcMGMBtafQE7)Y>G85hjvgaB$O}uA z+i#r78G7gJ&K_+c0;gXfZm#NL(O;c+Z(Qz@P4LV^_3LH)EAawryxR<()a_+4T;ijb zU$sC&A9Xg=kK2}w_+}2=yR`L-F0yju15P;)*)}x7{}HwQoy;5*bc35mXq$~H3H67@ zvb2f4VF#L^gUKTq>mI-#Usmu4u;3{}awij0f z0se+$ZqJIATHX`!`mY^)bx7zsTGlqCF4{bzqT=f~6Vm`$0IDP}$N?iHfHQWMrSbdu zoB4d6KXcR-cKgE05ls_^2O37k54JO&QEug#iSZ&Ww)>Tn)kbnPr#}PFo~tlQ7lE&6 z>zd-$@6&w}8y(*}v?au*$t%a0&r7+%bZ$LAAFJYzDJ(7RcB)L_<;`hwV=!@u-F6!) zRVh_DjLleV2wHpoGM4!^|Mp_M53dkZ@nc?T9w8lwq`jVK3(fT{M&-PxSm^O)$gUi% ziUzOY6b@$JLxaA~{oq$VyM0uM-Y*H<>rsNTk~h}NT eX}uYZCuASKo3_NU Date: Fri, 2 Aug 2024 17:22:40 -0400 Subject: [PATCH 08/32] chore: add CCBR logo to resources --- resources/CCBRlogo.png | Bin 0 -> 4717 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 resources/CCBRlogo.png diff --git a/resources/CCBRlogo.png b/resources/CCBRlogo.png new file mode 100644 index 0000000000000000000000000000000000000000..256ea2929cc032498f8d8500f4d4ca8f1037d303 GIT binary patch literal 4717 zcmY*d2UrtZuujBC2pt3j=|u=7p-Bx@dJ~Y2NRTcaLhk_ur3uod_ue}wQUnP|1VmJ% zH>DSmBH&x@z3<-lcE8=3J^Rl;XU?9TGrLc9byP`V3@{J~M5?Z)qz_27%L951I2&#? zssaM+rLU?8su^Yc0bE4dnyA}pYlCXaS z0`$vfBs=S05R8)yyNR|g>m4^wTUIfIAVQE`7RJiTit@CvlhRjG`Ntf%l3{niVBDpU zNMB!Hgs%v~&C?z!Bq=G06ck1Z3kv`U0WW`7jFq2&s~5-LB>&5!Wb0+^>FADebaQ3B z*;6v$BQD&$FP^>f}GcsduBcdp0l1u_WWxX4k=}E4Xn~$|>Cw2cI#M@6QYY;JfEXbDfVZ0=Jq; zSJXcTAI%oEMsRy0m2=A?i zVZ`2~v88rbw2{X51ww`$p=F%^;#2N|j9`KTAXNLVg(o)`J9l1hzf0Q~{ zh~;>VN_xzAPq~!syyHlS-Rb@lQgLfdh|6bpj02)q?hUV91w6#4GI}V5j{$v>1qP}b z0i{ifFf$U(n^@PQpM(ck1|J$0S&hmZeN#O5!pr!g{+t~wrCEmjC^KOGK&CvT6&^>K(7J&_+sIG!X?_&&Y$C&bk`3*+#!E!8G1fh zuKt9!=$_9stXC_XCFGdRwfp?nXqa{*vrf7oTSlFk#N;kZ@G^FyIbL%0#+pm}O6tLG zRpY2e7G~VR1+IpxcNE`ln$UTWZ&$uQlGt`E7nFQ+t)_F^^MQSCun#$X8mDHb#-no9 zW|A4)CjoUyLMF^}yw>-8@d$M2cRkdd-#vsZ{QRU+%|#_JePvALH_uVj!3bxXyM8a| z;XE;^3fIl=!XK}ffyH`8_L0k>D6MXRGBXOGWXpnScpdtrARn`N zXU*_-5(tDS{G9ab$`@dOlI#v(p6n4n%;U7CGVOEx?+@6fS%`h5jEm7r6@shLli2Xp zv^~_PkhYh{9lT==Rj1`wed3OTm$n&j;keoc#{J~nP~(N9)TQM)!(*IrG}pllNu(w;gm1%GcA8=+j6Bx_(~QDuu)koNQP zjr+dhJ+CUV6E%_jsG=MzAak}LJvi0Mbnt|)wrhOP#hjAX1RY+tkK-ep)?c#o^0D-a^H49+IjTk4dLb%|~WvU&P z<6@&-TP@S0&2ht!x|&rFpP{N!P&49nRdvf(+<|sRj_NRjxW{^Vsg4obDeF8FTWA!M zPxNYE0+6Wc2h|%9^P+W9rVnI4EM^7$98;x!O?K>gs+0Gdiy_Uz$CQ_YFw&fY4hAx3 zJiXD|Dmv^-uZ8Y*mp^5X;a~jnNFh$apWZ=Rd zg9cK5?e$R+O=KKBjLysY%~VzANwkHj!AE0oK&eR-bB*kI9>EES%t*i32&P1(=uWn zma_&{9{hdHp8FE<1924I8@xf!zrXfz-Hwp*kRIg`z2aynZ*hZ2{*K}t4W|Nb#E`OF zvv5Prw7@t@GvI+S&1;6Oi{Crs#+Jd&FABhjw)rc@pp%bj6MDf@mZ8BCz*2E0OXbKz zqf4J)(DiPLB5yO)jRjsD#inRP^fmUr^fCYadZ|?Qjuf$B{>G0<7JdtlBi^gPbGNZ? z^VM#nas0ssL-UO$M764VH$F@t!tIW!?3yQyh)I^Cm;)T@&L7T8F^TS{Afv!L>cwCy-;gavSfF?L1+2^_ z2|NuW+OYygFHZDI4>JeU#8({|%kRBFJMM zJzv}j-W&@PuHpU~gLo%P5>^`&^s_&c&Zykfh^Mby>8Vx>l?l|44XdUL&#$Z;77rw3 z334~1h9gVaPFs$MN^=4#e3`W8By(OX6Ug2sr~pUaBkK|IP2rvkI(Q$hEu7aOqrZ2$ z)jBtxq#Du8rxtVP6R~%7(nJKfxg@zVrG5r6tT)j*aQ$(nPIJ0ubTRpQ0f~myAX#0W z%QYUXH$8fqx!d&{iDlfIw-X*q1Cr}Wnus6yd}YQLJr#8fOUA=;TlkD-hyu>0a1#DW#4$Lq(f6gQo8H=Q6$b%lsb58R*g9bu=3& zEz8WbOyD3{Yv4=j9Z?uhaqK_Kk$|E?cJ1~?BQ2k=rR}xrfiE~aL60ID>FU9H%J&@P zjH7)hWjrNc=+be^zuPlRi_0CeQyHA((kJWT(Uh9<11$t{#!Q42Zk1hoJT3uht6^o2 zV)Tr*H#^dpvLEh$L;Z1OWrR4b?%r8f)$4pc+9<_~AuJrw zj1c-5P~U#tdU1%Bzs$U2DsNhOZ&V1AtZ1AGg zNd1&;?2QMp?bA_wuYRh_pD@!uMBXdW6qS_54|FC*+&w6ZU+Ib?tx>ik)sB0TB`R$o z*WsMV_Zc&xeNIh$EvPbIEhMMU2+c!4diSVA^DDg%)otw^jh-xk;F$|{_%zaP0hDf-*`L?iF z-6YX;Yn<%!l}YrsDQ0dwsH)pUqWo4hv05wXmbv$9_j_~pc> z&o-t(LYwAlgN2IP*V)iN=P&kc9Br`#LWq^Rr?$0@-AO(fo80T8HT#u*wFVT?`k>W8 z$QA9!gv@?uh7V0QYuQn{g@s?pZ*v za5JAvLY3a9Bc0(3W8zcgg`&uD)q9X|K8Slx!$wVKiAMT`w@O5VnV~^Nt7YM0hQ=}d z+ltDX%wsXUb44dpjyW&%Q3GvqvN=@=F83f z1c!atTvekFRoL(ZkqGa!YnfCeo}O0D8y8ADN^S(yx@xb#Z@kY~wZHeP zTNa{X^~u##sgrUehx+_?IPQqED9~EqEC{}%@$qe6BHTD0BAKAE!MT}&ErVXO=~$hP zcXmFQd->F(3T7*0;u&$>m#kOVRZv!*J2ZK8XG#}3aXaL;mzJ3574O|C=@mK2@712q zYb`I(M0jXu_uDvcFYd|H0T+uEX5E?#54ktTLZ*Mf+vt~z5#BG*QGObT$dY7q(D03A0iMvV;Zj(es{t!e`f-G(I%a;%kESdwlW}sRLp%!{Ez*u zH1TMjrrPU;&wd*yu)_*LAzViS-u9mrMKBqJ;e>pBUf8g2-|7%4?r^+?Jwlze?p#5A z9{H-xqo+EqLBC>sy{2JGp__+iO~c60h*n}bF<$X_H2kNOlPl{HMj?{hg6*Gxb| Tb{x;;KNodn9iZ0Zs4DN literal 0 HcmV?d00001 From a0116d41dc56693988d828d631c77bc7782af430 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 17:23:49 -0400 Subject: [PATCH 09/32] refactor: breakup main functions into separate files to prevent circular imports and make the code easier to maintain --- src/renee/__main__.py | 1163 ++------------------------------------- src/renee/cache.py | 44 ++ src/renee/conditions.py | 25 + src/renee/dryrun.py | 89 +++ src/renee/gui.py | 216 +++----- src/renee/initialize.py | 144 +++++ src/renee/run.py | 202 +++++++ src/renee/setup.py | 329 +++++++++++ src/renee/util.py | 329 +++++++++++ tests/test_build.py | 34 +- tests/test_util.py | 46 ++ 11 files changed, 1334 insertions(+), 1287 deletions(-) create mode 100644 src/renee/cache.py create mode 100644 src/renee/conditions.py create mode 100644 src/renee/dryrun.py create mode 100644 src/renee/initialize.py create mode 100644 src/renee/run.py create mode 100644 src/renee/setup.py create mode 100644 src/renee/util.py create mode 100644 tests/test_util.py diff --git a/src/renee/__main__.py b/src/renee/__main__.py index fee3a6b..7019cea 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -13,104 +13,38 @@ # Python standard library from __future__ import print_function -from shutil import copy, copytree -import sys, os, subprocess, re, json, textwrap, shlex, glob -from pathlib import Path -from datetime import datetime -import warnings +from shutil import copy +import json +import os +import subprocess +import sys +import textwrap # 3rd party imports from pypi -import argparse # potential python3 3rd party package, added in python/3.5 - -# local import -from .gui import launch_gui - +import argparse + +# local imports +from renee.src.renee.run import run +from renee.src.renee.dryrun import dryrun +from renee.src.renee.gui import launch_gui +from renee.src.renee.conditions import fatal +from renee.src.renee.util import ( + get_hpcname, + get_tmp_dir, + get_genomes_list, + get_version, + check_python_version, +) # Pipeline Metadata and globals -def renee_base(rel_path): - basedir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - ) - return os.path.join(basedir, rel_path) - - RENEE_PATH = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) - -with open(renee_base("VERSION"), "r") as vfile: - __version__ = f"v{vfile.read().strip()}" - +__version__ = get_version() __home__ = os.path.dirname(os.path.abspath(__file__)) _name = os.path.basename(sys.argv[0]) _description = "a highly-reproducible RNA-seq pipeline" - -# check python version ... should be 3.7 or newer -MIN_PYTHON = (3, 7) -try: - assert sys.version_info >= MIN_PYTHON -except AssertionError: - exit( - f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" - ) - - -def scontrol_show(): - """Run scontrol show config and parse the output as a dictionary - @return scontrol_dict : - """ - scontrol_dict = dict() - scontrol_out = subprocess.run( - "scontrol show config", shell=True, capture_output=True, text=True - ).stdout - if len(scontrol_out) > 0: - for line in scontrol_out.split("\n"): - line_split = line.split("=") - if len(line_split) > 1: - scontrol_dict[line_split[0].strip()] = line_split[1].strip() - return scontrol_dict - - -def get_hpcname(): - """Get the HPC name (biowulf, frce, or an empty string) - @return hpcname - """ - scontrol_out = scontrol_show() - hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else "" - if hpc == "fnlcr": - hpc = "frce" - return hpc - - -def get_tmp_dir(tmp_dir, outdir): - """Get default temporary directory for biowulf and frce. Allow user override.""" - hpc = get_hpcname() - if not tmp_dir: - if hpc == "biowulf": - tmp_dir = "/lscratch/$SLURM_JOBID" - elif hpc == "frce": - tmp_dir = outdir - else: - tmp_dir = None - return tmp_dir - - -def get_genomes_list(renee_path, hpcname=get_hpcname()): - """Get list of genome annotations available for the current platform - @return genomes_list - """ - genome_config_dir = os.path.join(renee_path, "config", "genomes", hpcname) - json_files = glob.glob(genome_config_dir + "/*.json") - if not json_files: - warnings.warn( - f"WARNING: No Genome Annotation JSONs found in {genome_config_dir}. Please specify a custom genome json file with `--genome`" - ) - genomes = [os.path.basename(file).replace(".json", "") for file in json_files] - return sorted(genomes) - - -# Get list of prebuilt genome annotations available for the platform -GENOMES_LIST = get_genomes_list(RENEE_PATH) +check_python_version() class Colors: @@ -147,79 +81,6 @@ class Colors: bg_white = "\33[47m" -def err(*message, **kwargs): - """Prints any provided args to standard error. - kwargs can be provided to modify print functions - behavior. - @param message : - Values printed to standard error - @params kwargs - Key words to modify print function behavior - """ - print(*message, file=sys.stderr, **kwargs) - - -def fatal(*message, **kwargs): - """Prints any provided args to standard error - and exits with an exit code of 1. - @param message : - Values printed to standard error - @params kwargs - Key words to modify print function behavior - """ - err(*message, **kwargs) - sys.exit(1) - - -def _now(): - ct = datetime.now() - now = ct.strftime("%y%m%d%H%M%S") - return now - - -def _get_file_mtime(f): - timestamp = datetime.fromtimestamp(os.path.getmtime(os.path.abspath(f))) - mtime = timestamp.strftime("%y%m%d%H%M%S") - return mtime - - -def exists(testpath): - """Checks if file exists on the local filesystem. - @param parser : - argparse parser object - @param testpath : - Name of file/directory to check - @return does_exist : - True when file/directory exists, False when file/directory does not exist - """ - does_exist = True - if not os.path.exists(testpath): - does_exist = False # File or directory does not exist on the filesystem - - return does_exist - - -def exe_in_path(cmd, path=None): - """Checks if an executable is in $PATH - @param cmd : - Name of executable to check - @param path : - Optional list of PATHs to check [default: $PATH] - @return : - True if exe in PATH, False if not in PATH - """ - if path is None: - path = os.environ["PATH"].split(os.pathsep) - - for prefix in path: - filename = os.path.join(prefix, cmd) - executable = os.access(filename, os.X_OK) - is_not_directory = os.path.isfile(filename) - if executable and is_not_directory: - return True - return False - - def permissions(parser, filename, *args, **kwargs): """Checks permissions using os.access() to see the user is authorized to access a file/directory. Checks for existence, readability, writability and executability via: @@ -231,7 +92,7 @@ def permissions(parser, filename, *args, **kwargs): @return filename : If file exists and user can read from file """ - if not exists(filename): + if not os.path.exists(filename): parser.error( "File '{}' does not exists! Failed to provide valid input.".format(filename) ) @@ -257,7 +118,7 @@ def check_cache(parser, cache, *args, **kwargs): @return cache : If singularity cache dir is valid """ - if not exists(cache): + if not os.path.exists(cache): # Cache directory does not exist on filesystem os.makedirs(cache) elif os.path.isfile(cache): @@ -275,7 +136,7 @@ def check_cache(parser, cache, *args, **kwargs): # Check that the user owns the child cache directory # May revert to os.getuid() if user id is not sufficient if ( - exists(os.path.join(cache, "cache")) + os.path.exists(os.path.join(cache, "cache")) and os.stat(os.path.join(cache, "cache")).st_uid != os.getuid() ): # User does NOT own the cache directory, raise error @@ -292,944 +153,6 @@ def check_cache(parser, cache, *args, **kwargs): return cache -def _cp_r_safe_( - source, target, resources=["workflow", "resources", "config"], safe_mode=True -): - """Private function: Given a list paths it will recursively copy each to the - target location. If a target path already exists, it will not over-write the - existing paths data when `safe_mode` is on. - @param resources : - List of paths to copy over to target location. - Default: ["workflow", "resources", "config"] - @params source : - Add a prefix PATH to each resource - @param target : - Target path to copy templates and required resources (aka destination) - @param safe_mode : - Only copy the resources to the target path - if they do not exist in the target path (default: True) - """ - for resource in resources: - destination = os.path.join(target, resource) - if os.path.exists(destination) and safe_mode: - print(f"🚫 path exists and `safe_mode` is ON, not copying: {destination}") - else: - # Required resources do not exist, or safe mode is off - copytree( - os.path.join(source, resource), destination, dirs_exist_ok=not safe_mode - ) - - -def rename(filename): - """Dynamically renames FastQ file to have one of the following extensions: *.R1.fastq.gz, *.R2.fastq.gz - To automatically rename the fastq files, a few assumptions are made. If the extension of the - FastQ file cannot be inferred, an exception is raised telling the user to fix the filename - of the fastq files. - @param filename : - Original name of file to be renamed - @return filename : - A renamed FastQ filename - """ - # Covers common extensions from SF, SRA, EBI, TCGA, and external sequencing providers - # key = regex to match string and value = how it will be renamed - extensions = { - # Matches: _R[12]_fastq.gz, _R[12].fastq.gz, _R[12]_fq.gz, etc. - ".R1.f(ast)?q.gz$": ".R1.fastq.gz", - ".R2.f(ast)?q.gz$": ".R2.fastq.gz", - # Matches: _R[12]_001_fastq_gz, _R[12].001.fastq.gz, _R[12]_001.fq.gz, etc. - # Capture lane information as named group - ".R1.(?P...).f(ast)?q.gz$": ".R1.fastq.gz", - ".R2.(?P...).f(ast)?q.gz$": ".R2.fastq.gz", - # Matches: _[12].fastq.gz, _[12].fq.gz, _[12]_fastq_gz, etc. - "_1.f(ast)?q.gz$": ".R1.fastq.gz", - "_2.f(ast)?q.gz$": ".R2.fastq.gz", - } - - if filename.endswith(".R1.fastq.gz") or filename.endswith(".R2.fastq.gz"): - # Filename is already in the correct format - return filename - - converted = False - for regex, new_ext in extensions.items(): - matched = re.search(regex, filename) - if matched: - # regex matches with a pattern in extensions - converted = True - # Try to get substring for named group lane, retain this in new file extension - # Come back to this later, I am not sure if this is necessary - # That string maybe static (i.e. always the same) - # https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/NamingConvention_FASTQ-files-swBS.htm# - try: - new_ext = "_{}{}".format(matched.group("lane"), new_ext) - except IndexError: - pass # Does not contain the named group lane - - filename = re.sub(regex, new_ext, filename) - break # only rename once - - if not converted: - raise NameError( - """\n\tFatal: Failed to rename provided input '{}'! - Cannot determine the extension of the user provided input file. - Please rename the file list above before trying again. - Here is example of acceptable input file extensions: - sampleName.R1.fastq.gz sampleName.R2.fastq.gz - sampleName_R1_001.fastq.gz sampleName_R2_001.fastq.gz - sampleName_1.fastq.gz sampleName_2.fastq.gz - Please also check that your input files are gzipped? - If they are not, please gzip them before proceeding again. - """.format( - filename, sys.argv[0] - ) - ) - - return filename - - -def _sym_safe_(input_data, target): - """Creates re-named symlinks for each FastQ file provided - as input. If a symlink already exists, it will not try to create a new symlink. - If relative source PATH is provided, it will be converted to an absolute PATH. - @param input_data ]>: - List of input files to symlink to target location - @param target : - Target path to copy templates and required resources - @return input_fastqs list[]: - List of renamed input FastQs - """ - input_fastqs = [] # store renamed fastq file names - for file in input_data: - filename = os.path.basename(file) - renamed = os.path.join(target, rename(filename)) - input_fastqs.append(renamed) - - if not exists(renamed): - # Create a symlink if it does not already exist - # Follow source symlinks to resolve any binding issues - os.symlink(os.path.abspath(os.path.realpath(file)), renamed) - - return input_fastqs - - -def initialize(sub_args, repo_path, output_path): - """Initialize the output directory and copy over required pipeline resources. - If user provides a output directory path that already exists on the filesystem - as a file (small chance of happening but possible), a OSError is raised. If the - output directory PATH already EXISTS, it will not try to create the directory. - If a resource also already exists in the output directory (i.e. output/workflow), - it will not try to copy over that directory. In the future, it maybe worth adding - an optional cli arg called --force, that can modify this behavior. Returns a list - of renamed FastQ files (i.e. renamed symlinks). - @param sub_args : - Parsed arguments for run sub-command - @param repo_path : - Path to RENEE source code and its templates - @param output_path : - Pipeline output path, created if it does not exist - @return inputs list[]: - List of pipeline's input FastQ files - """ - if not exists(output_path): - # Pipeline output directory does not exist on filesystem - os.makedirs(output_path) - - elif exists(output_path) and os.path.isfile(output_path): - # Provided Path for pipeline output directory exists as file - raise OSError( - """\n\tFatal: Failed to create provided pipeline output directory! - User provided --output PATH already exists on the filesystem as a file. - Please run {} again with a different --output PATH. - """.format( - sys.argv[0] - ) - ) - - # Copy over templates are other required resources - _cp_r_safe_( - source=repo_path, - target=output_path, - resources=["workflow", "resources", "config"], - ) - - # Create renamed symlinks to rawdata - inputs = _sym_safe_(input_data=sub_args.input, target=output_path) - - return inputs - - -def join_jsons(templates): - """Joins multiple JSON files to into one data structure - Used to join multiple template JSON files to create a global config dictionary. - @params templates : - List of template JSON files to join together - @return aggregated : - Dictionary containing the contents of all the input JSON files - """ - # Get absolute PATH to templates in renee git repo - repo_path = os.path.dirname(os.path.abspath(__file__)) - aggregated = {} - - for file in templates: - with open(os.path.join(repo_path, file), "r") as fh: - aggregated.update(json.load(fh)) - - return aggregated - - -def add_user_information(config): - """Adds username and user's home directory to config. - @params config : - Config dictionary containing metadata to run pipeline - @return config : - Updated config dictionary containing user information (username and home directory) - """ - # Get PATH to user's home directory - # Method is portable across unix-like OS and Windows - home = os.path.expanduser("~") - - # Get username from home directory PATH - username = os.path.split(home)[-1] - - # Update config with home directory and username - config["project"]["userhome"] = home - config["project"]["username"] = username - - return config - - -def get_nends(ifiles): - """Determines whether the dataset is paired-end or single-end. - If paired-end data, checks to see if both mates (R1 and R2) are present for each sample. - If single-end, nends is set to 1. Else if paired-end, nends is set to 2. - @params ifiles list[]: - List containing pipeline input files (renamed symlinks) - @return nends_status : - Integer reflecting nends status: 1 = se, 2 = pe - """ - # Determine if dataset contains paired-end data - paired_end = False - nends_status = 1 - for file in ifiles: - if file.endswith(".R2.fastq.gz"): - paired_end = True - nends_status = 2 - break # dataset is paired-end - - # Check to see if both mates (R1 and R2) are present paired-end data - if paired_end: - nends = {} # keep count of R1 and R2 for each sample - for file in ifiles: - # Split sample name on file extension - sample = re.split("\.R[12]\.fastq\.gz", os.path.basename(file))[0] - if sample not in nends: - nends[sample] = 0 - - nends[sample] += 1 - - # Check if samples contain both read mates - missing_mates = [sample for sample, count in nends.items() if count == 1] - if missing_mates: - # Missing an R1 or R2 for a provided input sample - raise NameError( - """\n\tFatal: Detected pair-end data but user failed to provide - both mates (R1 and R2) for the following samples:\n\t\t{}\n - Please check that the basename for each sample is consistent across mates. - Here is an example of a consistent basename across mates: - consistent_basename.R1.fastq.gz - consistent_basename.R2.fastq.gz - - Please do not run the pipeline with a mixture of single-end and paired-end - samples. This feature is currently not supported within {}, and it is - not recommended either. If this is a priority for your project, please run - paired-end samples and single-end samples separately (in two separate output directories). - If you feel like this functionality should exist, feel free to open an issue on Github. - """.format( - missing_mates, sys.argv[0] - ) - ) - - return nends_status - - -def get_fastq_screen_paths(fastq_screen_confs, match="DATABASE", file_index=-1): - """Parses fastq_screen.conf files to get the paths of each fastq_screen database. - This path contains bowtie2 indices for reference genome to screen against. - The paths are added as singularity bind points. - @param fastq_screen_confs list[]: - Name of fastq_screen config files to parse - @param match : - Keyword to indicate a line match [default: 'DATABASE'] - @param file_index : - Index of line line containing the fastq_screen database path - @return list[]: - Returns a list of fastq_screen database paths - """ - databases = [] - for file in fastq_screen_confs: - with open(file, "r") as fh: - for line in fh: - if line.startswith(match): - db_path = line.strip().split()[file_index] - databases.append(db_path) - return databases - - -def get_rawdata_bind_paths(input_files): - """ - Gets rawdata bind paths of user provided fastq files. - @params input_files list[]: - List containing user-provided input fastq files - @return bindpaths : - Set of rawdata bind paths - """ - bindpaths = [] - for file in input_files: - # Get directory of input file - rawdata_src_path = os.path.dirname(os.path.abspath(os.path.realpath(file))) - if rawdata_src_path not in bindpaths: - bindpaths.append(rawdata_src_path) - - return bindpaths - - -def add_sample_metadata(input_files, config, group=None): - """Adds sample metadata such as sample basename, label, and group information. - If sample sheet is provided, it will default to using information in that file. - If no sample sheet is provided, it will only add sample basenames and labels. - @params input_files list[]: - List containing pipeline input fastq files - @params config : - Config dictionary containing metadata to run pipeline - @params group : - Sample sheet containing basename, group, and label for each sample - @return config : - Updated config with basenames, labels, and groups (if provided) - """ - # TODO: Add functionality for basecase when user has samplesheet - added = [] - for file in input_files: - # Split sample name on file extension - sample = re.split("\.R[12]\.fastq\.gz", os.path.basename(file))[0] - if sample not in added: - # Only add PE sample information once - added.append(sample) - config["project"]["groups"]["rsamps"].append(sample) - config["project"]["groups"]["rgroups"].append(sample) - config["project"]["groups"]["rlabels"].append(sample) - - return config - - -def add_rawdata_information(sub_args, config, ifiles): - """Adds information about rawdata provided to pipeline. - Determines whether the dataset is paired-end or single-end and finds the set of all - rawdata directories (needed for -B option when running singularity). If a user provides - paired-end data, checks to see if both mates (R1 and R2) are present for each sample. - @param sub_args : - Parsed arguments for run sub-command - @params ifiles list[]: - List containing pipeline input files (renamed symlinks) - @params config : - Config dictionary containing metadata to run pipeline - @return config : - Updated config dictionary containing user information (username and home directory) - """ - # Determine whether dataset is paired-end or single-ends - # Updates config['project']['nends']: 1 = single-end, 2 = paired-end - nends = get_nends(ifiles) # Checks PE data for both mates (R1 and R2) - config["project"]["nends"] = nends - - # Finds the set of rawdata directories to bind - rawdata_paths = get_rawdata_bind_paths(input_files=sub_args.input) - config["project"]["datapath"] = ",".join(rawdata_paths) - - # Add each sample's basename, label and group info - config = add_sample_metadata(input_files=ifiles, config=config) - - return config - - -def image_cache(sub_args, config): - """Adds Docker Image URIs, or SIF paths to config if singularity cache option is provided. - If singularity cache option is provided and a local SIF does not exist, a warning is - displayed and the image will be pulled from URI in 'config/containers/images.json'. - @param sub_args : - Parsed arguments for run sub-command - @params config : - Docker Image config file - @return config : - Updated config dictionary containing user information (username and home directory) - """ - # Get absolute PATH to templates in renee git repo - repo_path = os.path.dirname(os.path.abspath(__file__)) - images = os.path.join(sub_args.output, "config", "containers", "images.json") - - # Read in config for docker image uris - with open(images, "r") as fh: - data = json.load(fh) - # Check if local sif exists - for image, uri in data["images"].items(): - if sub_args.sif_cache: - sif = os.path.join( - sub_args.sif_cache, - "{}.sif".format(os.path.basename(uri).replace(":", "_")), - ) - if not exists(sif): - # If local sif does not exist on in cache, print warning - # and default to pulling from URI in config/containers/images.json - print( - 'Warning: Local image "{}" does not exist in singularity cache'.format( - sif - ), - file=sys.stderr, - ) - else: - # Change pointer to image from Registry URI to local SIF - data["images"][image] = sif - - config.update(data) - - return config - - -def get_repo_git_commit_hash(repo_path): - """Gets the git commit hash of the RENEE repo. - @param repo_path : - Path to RENEE git repo - @return githash : - Latest git commit hash - """ - try: - githash = ( - subprocess.check_output( - ["git", "rev-parse", "HEAD"], stderr=subprocess.STDOUT, cwd=repo_path - ) - .strip() - .decode("utf-8") - ) - # Typecast to fix python3 TypeError (Object of type bytes is not JSON serializable) - # subprocess.check_output() returns a byte string - githash = str(githash) - except Exception as e: - # Github releases are missing the .git directory, - # meaning you cannot get a commit hash, set the - # commit hash to indicate its from a GH release - githash = "github_release" - - return githash - - -def setup(sub_args, ifiles, repo_path, output_path): - """Setup the pipeline for execution and creates config file from templates - @param sub_args : - Parsed arguments for run sub-command - @param repo_path : - Path to RENEE source code and its templates - @param output_path : - Pipeline output path, created if it does not exist - @return config : - Config dictionary containing metadata to run the pipeline - @return hpcname : - """ - # Resolves PATH to template for genomic reference files to select from a - # bundled reference genome or a user generated reference genome built via - # renee build subcommand - hpcname = get_hpcname() - if hpcname == "biowulf": - print("Thank you for running RENEE on BIOWULF!") - genome_config = os.path.join( - output_path, "config", "genomes", hpcname, sub_args.genome + ".json" - ) - elif hpcname == "frce": - print("Thank you for running RENEE on FRCE!") - genome_config = os.path.join( - output_path, "config", "genomes", hpcname, sub_args.genome + ".json" - ) - else: - genome_config = os.path.join( - output_path, "config", "genomes", sub_args.genome + ".json" - ) - if sub_args.genome.endswith(".json"): - # Provided a custom reference genome generated by renee build - genome_config = os.path.abspath(sub_args.genome) - - required = { - # Template for project-level information - "project": os.path.join(output_path, "config", "templates", "project.json"), - # Template for genomic reference files - # User provided argument --genome is used to select the template - "genome": genome_config, - # Template for tool information - "tools": os.path.join(output_path, "config", "templates", "tools.json"), - } - - # Global config file for pipeline, config.json - config = join_jsons(required.values()) # uses templates in the renee repo - # Update cluster-specific paths for fastq screen & kraken db - if hpcname == "biowulf" or hpcname == "frce": - db_json_filename = os.path.join( - output_path, "config", "templates", f"dbs_{hpcname}.json" - ) - with open( - os.path.join(os.path.dirname(os.path.abspath(__file__)), db_json_filename), - "r", - ) as json_file: - config["bin"]["rnaseq"]["tool_parameters"].update(json.load(json_file)) - - config = add_user_information(config) - config = add_rawdata_information(sub_args, config, ifiles) - - # Resolves if an image needs to be pulled from an OCI registry or - # a local SIF generated from the renee cache subcommand exists - config = image_cache(sub_args, config) - - # Add other cli collected info - config["project"]["annotation"] = sub_args.genome - config["project"]["version"] = __version__ - config["project"]["pipelinehome"] = os.path.dirname(__file__) - config["project"]["workpath"] = os.path.abspath(sub_args.output) - genome_annotation = sub_args.genome - config["project"]["organism"] = genome_annotation.split("_")[0] - - # Add optional cli workflow steps - config["options"] = {} - config["options"]["star_2_pass_basic"] = sub_args.star_2_pass_basic - config["options"]["small_rna"] = sub_args.small_rna - config["options"]["tmp_dir"] = get_tmp_dir(sub_args.tmp_dir, output_path) - config["options"]["shared_resources"] = sub_args.shared_resources - if sub_args.wait: - config["options"]["wait"] = "True" - else: - config["options"]["wait"] = "False" - if sub_args.create_nidap_folder: - config["options"]["create_nidap_folder"] = "True" - else: - config["options"]["create_nidap_folder"] = "False" - - # Get latest git commit hash - git_hash = get_repo_git_commit_hash(repo_path) - config["project"]["git_commit_hash"] = git_hash - - if sub_args.shared_resources: - # Update paths to shared resources directory - config["bin"]["rnaseq"]["tool_parameters"][ - "FASTQ_SCREEN_CONFIG" - ] = os.path.join( - sub_args.shared_resources, "fastq_screen_db", "fastq_screen.conf" - ) - config["bin"]["rnaseq"]["tool_parameters"][ - "FASTQ_SCREEN_CONFIG2" - ] = os.path.join( - sub_args.shared_resources, "fastq_screen_db", "fastq_screen_2.conf" - ) - config["bin"]["rnaseq"]["tool_parameters"]["KRAKENBACDB"] = os.path.join( - sub_args.shared_resources, "20180907_standard_kraken2" - ) - - # Save config to output directory - print( - "\nGenerating config file in '{}'... ".format( - os.path.join(output_path, "config.json") - ), - end="", - ) - with open(os.path.join(output_path, "config.json"), "w") as fh: - json.dump(config, fh, indent=4, sort_keys=True) - print("Done!") - - return config - - -def dryrun( - outdir, - config="config.json", - snakefile=os.path.join("workflow", "Snakefile"), - write_to_file=True, -): - """Dryruns the pipeline to ensure there are no errors prior to running. - @param outdir : - Pipeline output PATH - @return dryrun_output : - Byte string representation of dryrun command - """ - try: - dryrun_output = subprocess.check_output( - [ - "snakemake", - "-npr", - "-s", - str(snakefile), - "--use-singularity", - "--rerun-incomplete", - "--cores", - "4", - "--configfile={}".format(config), - ], - cwd=outdir, - stderr=subprocess.STDOUT, - ) - - except subprocess.CalledProcessError as e: - # Singularity is NOT in $PATH - # Tell user to load both main dependencies to avoid the OSError below - print( - "Are singularity and snakemake in your PATH? Please check before proceeding again!" - ) - sys.exit("{}\n{}".format(e, e.output.decode("utf-8"))) - except OSError as e: - # Catch: OSError: [Errno 2] No such file or directory - # Occurs when command returns a non-zero exit-code - if e.errno == 2 and not exe_in_path("snakemake"): - # Failure caused because snakemake is NOT in $PATH - print( - "\x1b[6;37;41m\nError: Are snakemake AND singularity in your $PATH?\nPlease check before proceeding again!\x1b[0m", - file=sys.stderr, - ) - sys.exit("{}".format(e)) - else: - # Failure caused by unknown cause, raise error - raise e - - if write_to_file: - now = _now() - with open(os.path.join(outdir, "dryrun." + str(now) + ".log"), "w") as outfile: - outfile.write("{}".format(dryrun_output.decode("utf-8"))) - - return dryrun_output - - -def orchestrate( - mode, - outdir, - additional_bind_paths, - alt_cache, - threads=2, - submission_script="runner", - masterjob="pl:renee", - tmp_dir=None, - wait="", - hpcname="", -): - """Runs RENEE pipeline via selected executor: local or slurm. - If 'local' is selected, the pipeline is executed locally on a compute node/instance. - If 'slurm' is selected, jobs will be submitted to the cluster using SLURM job scheduler. - Support for additional job schedulers (i.e. PBS, SGE, LSF) may be added in the future. - @param outdir : - Pipeline output PATH - @param mode : - Execution method or mode: - local runs serially a compute instance without submitting to the cluster. - slurm will submit jobs to the cluster using the SLURM job scheduler. - @param additional_bind_paths : - Additional paths to bind to container filesystem (i.e. input file paths) - @param alt_cache : - Alternative singularity cache location - @param threads : - Number of threads to use for local execution method - @param submission_script : - Path to master jobs submission script: - renee run = /path/to/output/resources/runner - renee build = /path/to/output/resources/builder - @param masterjob : - Name of the master job - @param tmp_dir : - Absolute Path to temp dir for compute node - @param wait : - "--wait" to wait for master job to finish. This waits when pipeline is called via NIDAP API - @param hpcname : - "biowulf" if run on biowulf, "frce" if run on frce, blank otherwise. hpcname is determined in setup() function - @return masterjob : - """ - # Add additional singularity bind PATHs - # to mount the local filesystem to the - # containers filesystem, NOTE: these - # PATHs must be an absolute PATHs - outdir = os.path.abspath(outdir) - # Add any default PATHs to bind to - # the container's filesystem, like - # tmp directories, /lscratch - addpaths = [] - # set tmp_dir depending on hpc - tmp_dir = get_tmp_dir(tmp_dir, outdir) - temp = os.path.dirname(tmp_dir.rstrip("/")) - if temp == os.sep: - temp = tmp_dir.rstrip("/") - if outdir not in additional_bind_paths.split(","): - addpaths.append(outdir) - if temp not in additional_bind_paths.split(","): - addpaths.append(temp) - bindpaths = ",".join(addpaths) - - # Set ENV variable 'SINGULARITY_CACHEDIR' - # to output directory - my_env = {} - my_env.update(os.environ) - cache = os.path.join(outdir, ".singularity") - my_env["SINGULARITY_CACHEDIR"] = cache - - if alt_cache: - # Override the pipeline's default cache location - my_env["SINGULARITY_CACHEDIR"] = alt_cache - cache = alt_cache - - if additional_bind_paths: - # Add Bind PATHs for outdir and tmp dir - if bindpaths: - bindpaths = ",{}".format(bindpaths) - bindpaths = "{}{}".format(additional_bind_paths, bindpaths) - - if not exists(os.path.join(outdir, "logfiles")): - # Create directory for logfiles - os.makedirs(os.path.join(outdir, "logfiles")) - - if exists(os.path.join(outdir, "logfiles", "snakemake.log")): - mtime = _get_file_mtime(os.path.join(outdir, "logfiles", "snakemake.log")) - newname = os.path.join(outdir, "logfiles", "snakemake." + str(mtime) + ".log") - os.rename(os.path.join(outdir, "logfiles", "snakemake.log"), newname) - - # Create .singularity directory for installations of snakemake - # without setuid which create a sandbox in the SINGULARITY_CACHEDIR - if not exists(cache): - # Create directory for sandbox and image layers - os.makedirs(cache) - - # Run on compute node or instance without submitting jobs to a scheduler - if mode == "local": - # Run RENEE: instantiate main/master process - # Look into later: it maybe worth replacing Popen subprocess with a direct - # snakemake API call: https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html - # Create log file for pipeline - logfh = open(os.path.join(outdir, "logfiles", "snakemake.log"), "w") - masterjob = subprocess.Popen( - [ - "snakemake", - "-pr", - "--use-singularity", - "--singularity-args", - "'-B {}'".format(bindpaths), - "--cores", - str(threads), - "--configfile=config.json", - ], - cwd=outdir, - env=my_env, - ) - - # Submitting jobs to cluster via SLURM's job scheduler - elif mode == "slurm": - # Run RENEE: instantiate main/master process - # Look into later: it maybe worth replacing Popen subprocess with a direct - # snakemake API call: https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html - # snakemake --latency-wait 120 -s $R/Snakefile -d $R --printshellcmds - # --cluster-config $R/cluster.json --keep-going --restart-times 3 - # --cluster "sbatch --gres {cluster.gres} --cpus-per-task {cluster.threads} -p {cluster.partition} -t {cluster.time} --mem {cluster.mem} --job-name={params.rname}" - # -j 500 --rerun-incomplete --stats $R/Reports/initialqc.stats -T - # 2>&1| tee -a $R/Reports/snakemake.log - - # Create log file for master job information - logfh = open(os.path.join(outdir, "logfiles", "master.log"), "w") - # submission_script for renee run is /path/to/output/resources/runner - # submission_script for renee build is /path/to/output/resources/builder - cmdlist = [ - str(os.path.join(outdir, "resources", str(submission_script))), - mode, - "-j", - str(masterjob), - "-b", - str(bindpaths), - "-o", - str(outdir), - "-c", - str(cache), - "-t", - str(tmp_dir), - ] - if str(wait) == "--wait": - cmdlist.append("-w") - if str(hpcname) != "": - cmdlist.append("-n") - cmdlist.append(hpcname) - else: - cmdlist.append("-n") - cmdlist.append("unknown") - - print(" ".join(cmdlist)) - masterjob = subprocess.Popen( - cmdlist, cwd=outdir, stderr=subprocess.STDOUT, stdout=logfh, env=my_env - ) - - return masterjob - - -def resolve_additional_bind_paths(search_paths): - """Finds additional singularity bind paths from a list of random paths. Paths are - indexed with a compostite key containing the first two directories of an absolute - file path to avoid issues related to shared names across the /gpfs shared network - filesystem. For each indexed list of file paths, a common path is found. Assumes - that the paths provided are absolute paths, the renee build sub command creates - resource file index with absolute filenames. - @param search_paths list[]: - List of absolute file paths to find common bind paths from - @return common_paths list[]: - Returns a list of common shared file paths to create additional singularity bind paths - """ - common_paths = [] - indexed_paths = {} - - for ref in search_paths: - # Skip over resources with remote URI and - # skip over strings that are not file PATHS as - # RENEE build creates absolute resource PATHS - if ( - ref.lower().startswith("sftp://") - or ref.lower().startswith("s3://") - or ref.lower().startswith("gs://") - or not ref.lower().startswith(os.sep) - ): - continue - - # Break up path into directory tokens - for r in [ - ref, - str(Path(ref).resolve()), - ]: # taking care of paths which are symlinks! - path_list = os.path.abspath(r).split(os.sep) - - try: # Create composite index from first two directories - # Avoids issues created by shared /gpfs/ PATHS - index = path_list[1:3] - index = tuple(index) - except IndexError: - index = path_list[1] # ref startswith / - if index not in indexed_paths: - indexed_paths[index] = [] - # Create an INDEX to find common PATHS for each root child directory - # like /scratch or /data. This prevents issues when trying to find the - # common path between these two different directories (resolves to /) - indexed_paths[index].append(str(os.sep).join(path_list)) - - for index, paths in indexed_paths.items(): - # Find common paths for each path index - common_paths.append(os.path.dirname(os.path.commonprefix(paths))) - - return list(set(common_paths)) - - -def run(sub_args): - """Initialize, setup, and run the RENEE pipeline. - Calls initialize() to create output directory and copy over pipeline resources, - setup() to create the pipeline config file, dryrun() to ensure their are no issues - before running the pipeline, and finally run() to execute the Snakemake workflow. - @param sub_args : - Parsed arguments for run sub-command - """ - # Get PATH to RENEE git repository for copying over pipeline resources - - # hpcname is either biowulf, frce, or blank - hpcname = get_hpcname() - if sub_args.runmode == "init" or not os.path.exists( - os.path.join(sub_args.output, "config.json") - ): - # Initialize working directory, copy over required pipeline resources - input_files = initialize( - sub_args, repo_path=RENEE_PATH, output_path=sub_args.output - ) - - # Step pipeline for execution, create config.json config file from templates - config = setup( - sub_args, - ifiles=input_files, - repo_path=RENEE_PATH, - output_path=sub_args.output, - ) - # load config from existing file - else: - with open(os.path.join(sub_args.output, "config.json"), "r") as config_file: - config = json.load(config_file) - - # ensure the working dir is read/write friendly - scripts_path = os.path.join(sub_args.output, "workflow", "scripts") - os.chmod(scripts_path, 0o755) - - # Optional Step: Dry-run pipeline - if sub_args.dry_run: - dryrun_output = dryrun( - outdir=sub_args.output - ) # python3 returns byte-string representation - print("\nDry-running RENEE pipeline:\n{}".format(dryrun_output.decode("utf-8"))) - # sys.exit(0) # DONT exit now ... exit after printing singularity bind paths - - # determine "wait" - wait = "" - if sub_args.wait: - wait = "--wait" - - # Resolve all Singularity Bindpaths - rawdata_bind_paths = config["project"]["datapath"] - - # Get FastQ Screen Database paths - # and other reference genome file paths - fqscreen_cfg1 = config["bin"]["rnaseq"]["tool_parameters"]["FASTQ_SCREEN_CONFIG"] - fqscreen_cfg2 = config["bin"]["rnaseq"]["tool_parameters"]["FASTQ_SCREEN_CONFIG2"] - fq_screen_paths = get_fastq_screen_paths( - [ - os.path.join(sub_args.output, fqscreen_cfg1), - os.path.join(sub_args.output, fqscreen_cfg2), - ] - ) - kraken_db_path = [config["bin"]["rnaseq"]["tool_parameters"]["KRAKENBACDB"]] - genome_bind_paths = resolve_additional_bind_paths( - list(config["references"]["rnaseq"].values()) + fq_screen_paths + kraken_db_path - ) - all_bind_paths = "{},{}".format(",".join(genome_bind_paths), rawdata_bind_paths) - - if sub_args.dry_run: # print singularity bind baths and exit - print("\nSingularity Bind Paths:{}".format(all_bind_paths)) - sys.exit(0) - - # Run pipeline - masterjob = orchestrate( - mode=sub_args.mode, - outdir=sub_args.output, - additional_bind_paths=all_bind_paths, - alt_cache=sub_args.singularity_cache, - threads=sub_args.threads, - tmp_dir=get_tmp_dir(sub_args.tmp_dir, sub_args.output), - wait=wait, - hpcname=hpcname, - ) - - # Wait for subprocess to complete, - # this is blocking - masterjob.wait() - - # Relay information about submission - # of the master job or the exit code of the - # pipeline that ran in local mode - if sub_args.mode == "local": - if int(masterjob.returncode) == 0: - print("{} pipeline has successfully completed".format("RENEE")) - else: - fatal( - "{} pipeline failed. Please see standard output for more information.".format( - "RENEE" - ) - ) - elif sub_args.mode == "slurm": - jobid = ( - open(os.path.join(sub_args.output, "logfiles", "mjobid.log")).read().strip() - ) - if int(masterjob.returncode) == 0: - print("Successfully submitted master job: ", end="") - else: - fatal( - "Error occurred when submitting the master job. Error code = {}".format( - masterjob.returncode - ) - ) - print(jobid) - - def unlock(sub_args): """Unlocks a previous runs output directory. If snakemake fails ungracefully, it maybe required to unlock the working directory before proceeding again. @@ -1278,7 +201,7 @@ def _sym_refs(input_data, target, make_copy=False): source_name = os.path.abspath(os.path.realpath(file)) canocial_input_paths.append(os.path.dirname(source_name)) - if not exists(target_name): + if not os.path.exists(target_name): if not make_copy: # Create a symlink if it does not already exist # Follow source symlinks to resolve any binding issues @@ -1353,11 +276,11 @@ def configure_build(sub_args, git_repo, output_path): @return additional_bind_paths list[]: List of canonical paths for the list of input files to be added singularity bindpath """ - if not exists(output_path): + if not os.path.exists(output_path): # Pipeline output directory does not exist on filesystem os.makedirs(output_path) - elif exists(output_path) and os.path.isfile(output_path): + elif os.path.exists(output_path) and os.path.isfile(output_path): # Provided Path for pipeline output directory exists as file raise OSError( """\n\tFatal: Failed to create provided pipeline output directory! @@ -1493,10 +416,10 @@ def cache(sub_args): images = os.path.join(repo_path, "config", "containers", "images.json") # Create image cache - if not exists(sif_cache): + if not os.path.exists(sif_cache): # Pipeline output directory does not exist on filesystem os.makedirs(sif_cache) - elif exists(sif_cache) and os.path.isfile(sif_cache): + elif os.path.exists(sif_cache) and os.path.isfile(sif_cache): # Provided Path for pipeline output directory exists as file raise OSError( """\n\tFatal: Failed to create provided sif cache directory! @@ -1516,7 +439,7 @@ def cache(sub_args): sif = os.path.join( sif_cache, "{}.sif".format(os.path.basename(uri).replace(":", "_")) ) - if not exists(sif): + if not os.path.exists(sif): # If local sif does not exist on in cache, print warning # and default to pulling from URI in config/containers/images.json print('Image will be pulled from "{}".'.format(uri), file=sys.stderr) @@ -1576,7 +499,7 @@ def genome_options(parser, user_option, prebuilt): # Checks against valid pre-built options # TODO: makes this more dynamic in the future to have it check against # a list of genomes (files) in config/genomes/*.json - elif not user_option in prebuilt: + elif user_option not in prebuilt: # User did NOT provide a valid choice parser.error( """provided invalid choice, '{}', to --genome argument!\n @@ -1844,7 +767,7 @@ def parsed_arguments(name, description): {2}{3}Prebuilt genome+annotation combos:{4} {5} """.format( - "renee", __version__, c.bold, c.url, c.end, list(GENOMES_LIST) + "renee", __version__, c.bold, c.url, c.end, list(get_genomes_list()) ) ) @@ -1859,6 +782,12 @@ def parsed_arguments(name, description): add_help=False, ) + subparser_gui = subparsers.add_parser( + "gui", + help="Launch the RENEE pipeline with a Graphical User Interface (GUI)", + description="", + ) + # Required Arguments # Input FastQ files subparser_run.add_argument( @@ -1884,7 +813,9 @@ def parsed_arguments(name, description): subparser_run.add_argument( "--genome", required=True, - type=lambda option: str(genome_options(subparser_run, option, GENOMES_LIST)), + type=lambda option: str( + genome_options(subparser_run, option, get_genomes_list()) + ), help=argparse.SUPPRESS, ) @@ -2178,7 +1109,7 @@ def parsed_arguments(name, description): --output /data/$USER/refs/mm39_M26 \\ --dry-run - # Step 2A.) Build RENEE reference files + # Step 2A.) Build {0} reference files renee build --ref-fa GRCm39.primary_assembly.genome.fa \\ --ref-name mm39 \\ --ref-gtf gencode.vM26.annotation.gtf \\ @@ -2191,7 +1122,7 @@ def parsed_arguments(name, description): {2}{3}Prebuilt genome+annotation combos:{4} {5} """.format( - "renee", __version__, c.bold, c.url, c.end, list(GENOMES_LIST) + "renee", __version__, c.bold, c.url, c.end, list(get_genomes_list()) ) ) @@ -2514,12 +1445,6 @@ def parsed_arguments(name, description): # Add custom help message subparser_cache.add_argument("-h", "--help", action="help", help=argparse.SUPPRESS) - subparser_gui = subparsers.add_parser( - "gui", - help="Launch the RENEE pipeline with a Graphical User Interface (GUI)", - description="", - ) - # Define handlers for each sub-parser subparser_run.set_defaults(func=run) subparser_unlock.set_defaults(func=unlock) diff --git a/src/renee/cache.py b/src/renee/cache.py new file mode 100644 index 0000000..2e9f5c0 --- /dev/null +++ b/src/renee/cache.py @@ -0,0 +1,44 @@ +import json +import os +import sys + + +def image_cache(sub_args, config): + """Adds Docker Image URIs, or SIF paths to config if singularity cache option is provided. + If singularity cache option is provided and a local SIF does not exist, a warning is + displayed and the image will be pulled from URI in 'config/containers/images.json'. + @param sub_args : + Parsed arguments for run sub-command + @params config : + Docker Image config file + @return config : + Updated config dictionary containing user information (username and home directory) + """ + images = os.path.join(sub_args.output, "config", "containers", "images.json") + + # Read in config for docker image uris + with open(images, "r") as fh: + data = json.load(fh) + # Check if local sif exists + for image, uri in data["images"].items(): + if sub_args.sif_cache: + sif = os.path.join( + sub_args.sif_cache, + "{}.sif".format(os.path.basename(uri).replace(":", "_")), + ) + if not os.path.exists(sif): + # If local sif does not exist on in cache, print warning + # and default to pulling from URI in config/containers/images.json + print( + 'Warning: Local image "{}" does not exist in singularity cache'.format( + sif + ), + file=sys.stderr, + ) + else: + # Change pointer to image from Registry URI to local SIF + data["images"][image] = sif + + config.update(data) + + return config diff --git a/src/renee/conditions.py b/src/renee/conditions.py new file mode 100644 index 0000000..021870f --- /dev/null +++ b/src/renee/conditions.py @@ -0,0 +1,25 @@ +import sys + + +def fatal(*message, **kwargs): + """Prints any provided args to standard error + and exits with an exit code of 1. + @param message : + Values printed to standard error + @params kwargs + Key words to modify print function behavior + """ + err(*message, **kwargs) + sys.exit(1) + + +def err(*message, **kwargs): + """Prints any provided args to standard error. + kwargs can be provided to modify print functions + behavior. + @param message : + Values printed to standard error + @params kwargs + Key words to modify print function behavior + """ + print(*message, file=sys.stderr, **kwargs) diff --git a/src/renee/dryrun.py b/src/renee/dryrun.py new file mode 100644 index 0000000..0a05d56 --- /dev/null +++ b/src/renee/dryrun.py @@ -0,0 +1,89 @@ +import datetime +import os +import subprocess +import sys + + +def dryrun( + outdir, + config="config.json", + snakefile=os.path.join("workflow", "Snakefile"), + write_to_file=True, +): + """Dryruns the pipeline to ensure there are no errors prior to running. + @param outdir : + Pipeline output PATH + @return dryrun_output : + Byte string representation of dryrun command + """ + try: + dryrun_output = subprocess.check_output( + [ + "snakemake", + "-npr", + "-s", + str(snakefile), + "--use-singularity", + "--rerun-incomplete", + "--cores", + "4", + "--configfile={}".format(config), + ], + cwd=outdir, + stderr=subprocess.STDOUT, + ) + + except subprocess.CalledProcessError as e: + # Singularity is NOT in $PATH + # Tell user to load both main dependencies to avoid the OSError below + print( + "Are singularity and snakemake in your PATH? Please check before proceeding again!" + ) + sys.exit("{}\n{}".format(e, e.output.decode("utf-8"))) + except OSError as e: + # Catch: OSError: [Errno 2] No such file or directory + # Occurs when command returns a non-zero exit-code + if e.errno == 2 and not exe_in_path("snakemake"): + # Failure caused because snakemake is NOT in $PATH + print( + "\x1b[6;37;41m\nError: Are snakemake AND singularity in your $PATH?\nPlease check before proceeding again!\x1b[0m", + file=sys.stderr, + ) + sys.exit("{}".format(e)) + else: + # Failure caused by unknown cause, raise error + raise e + + if write_to_file: + now = _now() + with open(os.path.join(outdir, "dryrun." + str(now) + ".log"), "w") as outfile: + outfile.write("{}".format(dryrun_output.decode("utf-8"))) + + return dryrun_output + + +def _now(): + ct = datetime.now() + now = ct.strftime("%y%m%d%H%M%S") + return now + + +def exe_in_path(cmd, path=None): + """Checks if an executable is in $PATH + @param cmd : + Name of executable to check + @param path : + Optional list of PATHs to check [default: $PATH] + @return : + True if exe in PATH, False if not in PATH + """ + if path is None: + path = os.environ["PATH"].split(os.pathsep) + + for prefix in path: + filename = os.path.join(prefix, cmd) + executable = os.access(filename, os.X_OK) + is_not_directory = os.path.isfile(filename) + if executable and is_not_directory: + return True + return False diff --git a/src/renee/gui.py b/src/renee/gui.py index 1b94212..01da95b 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -1,130 +1,45 @@ #!/usr/bin/env python3 - - -global DEBUG - -DEBUG = True - +import argparse +import contextlib import glob +import io import os import PySimpleGUI as sg import sys -import stat -import subprocess from tkinter import Tk -import uuid - - -# getting the name of the directory -# where the this file is present. -current = os.path.dirname(os.path.realpath(__file__)) -# Getting the parent directory name -# where the current directory is present. -parent = os.path.dirname(current) +from renee.src.renee.util import ( + get_genomes_dict, + get_tmp_dir, + get_shared_resources_dir, + renee_base, +) +from renee.src.renee.run import run -# adding the parent directory to -# the sys.path. -sys.path.append(parent) -imgdir = os.path.join(parent, "resources", "images") +# TODO: get rid of all the global variables, get values from CLI flags instead +global DEBUG +DEBUG = True +# TODO: let's use a tmp dir and put these files there instead. see for inspiration:https://github.com/CCBR/RENEE/blob/16d13dca1d5f0f43c7dfda379efb882a67635d17/tests/test_cache.py#L14-L28 +global FILES_TO_DELETE global RENEEDIR global SIFCACHE global RENEE global RENEEVER -global RANDOMSTR -global FILES2DELETE global HOSTNAME RENEEDIR = os.getenv("RENEEDIR") SIFCACHE = os.getenv("SIFCACHE") RENEEVER = os.getenv("RENEEVER") HOSTNAME = os.getenv("HOSTNAME") -RENNE = os.path.join(RENEEDIR, RENEEVER, "bin", "renee") -RANDOMSTR = str(uuid.uuid4()) -FILES2DELETE = list() +RENNE = renee_base(os.path.join("bin", "renee")) -# sg.SetOptions(button_color=sg.COLOR_SYSTEM_DEFAULT) - - -def version_check(): - # version check - # glob.iglob requires 3.11 for using "include_hidden=True" - MIN_PYTHON = (3, 11) - try: - assert sys.version_info >= MIN_PYTHON - print( - "Python version: {0}.{1}.{2}".format( - sys.version_info.major, sys.version_info.minor, sys.version_info.micro - ) - ) - except AssertionError: - exit( - f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" - ) - - -def copy_to_clipboard(string): - r = Tk() - r.withdraw() - r.clipboard_clear() - r.clipboard_append(string) - r.update() - r.destroy() - - -def get_combos(): - resource_dir = os.path.join(RENEEDIR, "resources") - if not os.path.exists(resource_dir): - sys.exit("ERROR: Folder does not exist : {}".format(resource_dir)) - searchterm = resource_dir + "/**/**/*json" - jsonfiles = glob.glob(searchterm) - if len(jsonfiles) == 0: - sys.exit("ERROR: No Genome+Annotation JSONs found in : {}".format(resource_dir)) - jsons = dict() - for j in jsonfiles: - k = os.path.basename(j) - k = k.replace(".json", "") - jsons[k] = j - return jsons - - -def fixpath(p): - return os.path.abspath(os.path.expanduser(p)) - - -def get_fastqs(inputdir): - inputdir = fixpath(inputdir) - inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") - inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") - inputfastqs.extend(inputfqs) - return inputfastqs - - -def deletefiles(): - for f in FILES2DELETE: - if os.path.exists(f): - os.remove(f) - - -def run(cmd, dry=False): - if dry: - cmd += " --dry-run " - runner_file = os.path.join(os.getenv("HOME"), RANDOMSTR + ".renee.runner") - FILES2DELETE.append(runner_file) - with open(runner_file, "w") as runner: - runner.write(cmd) - st = os.stat(runner_file) - os.chmod(runner_file, st.st_mode | stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) - x = subprocess.run(runner_file, capture_output=True, shell=True, text=True) - run_stdout = x.stdout.encode().decode("utf-8") - run_stderr = x.stderr.encode().decode("utf-8") - return run_stdout, run_stderr +FILES_TO_DELETE = list() def launch_gui(sub_args): # get drop down genome+annotation options - jsons = get_combos() + jsons = get_genomes_dict() genome_annotation_combinations = list(jsons.keys()) genome_annotation_combinations.sort() if DEBUG: @@ -132,7 +47,7 @@ def launch_gui(sub_args): if DEBUG: print(genome_annotation_combinations) - logo = sg.Image(os.path.join(imgdir, "CCBRlogo.png")) + logo = sg.Image(renee_base(os.path.join("resources", "CCBRlogo.png"))) # create layout layout = [ [sg.Column([[logo]], justification="center")], @@ -264,22 +179,19 @@ def launch_gui(sub_args): continue # sg.Popup("Output folder exists... this is probably a re-run ... is it?",location=(0,500)) genome = jsons[values["--ANNOTATION--"]] - renee_cmd = RENNE + " run " - renee_cmd += " --input " + " ".join(inputfastqs) - renee_cmd += " --output " + values["--OUTDIR--"] - renee_cmd += " --genome " + genome - renee_cmd += " --sif-cache " + SIFCACHE - renee_cmd += " --mode slurm " - # if HOSTNAME != "biowulf.nih.gov": - if HOSTNAME == "fsitgl-head01p.ncifcrf.gov": - renee_cmd += " --tmp-dir /scratch/cluster_scratch/$USER " - renee_cmd += " --shared-resources /mnt/projects/CCBR-Pipelines/pipelines/RENEE/resources/shared_resources " - run_stdout, run_stderr = run(renee_cmd, dry=True) - if DEBUG: - print(run_stdout) - if DEBUG: - print(run_stderr) - allout = "{}\n{}".format(run_stdout, run_stderr) + # create sub args for renee run + run_args = argparse.Namespace( + input=" ".join(inputfastqs), + output=values["--OUTDIR--"], + genome=genome, + sif_cache=SIFCACHE, + mode="slurm", + tmp_dir=get_tmp_dir(), + shared_resources=get_shared_resources_dir(), + dry_run=True, + ) + # execute dry run and capture stdout/stderr + allout = run_in_context(run_args) sg.popup_scrolled( allout, title="Dryrun:STDOUT/STDERR", @@ -287,6 +199,7 @@ def launch_gui(sub_args): location=(0, 500), size=(80, 30), ) + # TODO use a regex to simplify this line if "error" in allout or "Error" in allout or "ERROR" in allout: continue ch = sg.popup_yes_no( @@ -296,12 +209,16 @@ def launch_gui(sub_args): font=("Arial", 12, "bold"), ) if ch == "Yes": - run_stdout, run_stderr = run(renee_cmd, dry=False) - if DEBUG: - print(run_stdout) - if DEBUG: - print(run_stderr) - allout = "{}\n{}".format(run_stdout, run_stderr) + run_args.dry_run = False + # execute live run + allout = run_in_context(run_args) + sg.popup_scrolled( + allout, + title="Dryrun:STDOUT/STDERR", + font=("Monaco", 10), + location=(0, 500), + size=(80, 30), + ) sg.popup_scrolled( allout, title="Slurmrun:STDOUT/STDERR", @@ -323,17 +240,46 @@ def launch_gui(sub_args): continue window.close() - if len(FILES2DELETE) != 0: - deletefiles() + if len(FILES_TO_DELETE) != 0: + delete_files(FILES_TO_DELETE) + + +def run_in_context(args): + """Execute the run function in a context manager to capture stdout/stderr""" + with contextlib.redirect_stdout(io.StringIO()) as out_f, contextlib.redirect_stderr( + io.StringIO() + ) as err_f: + run(args) + allout = out_f.getvalue() + "\n" + err_f.getvalue() + return allout + + +def copy_to_clipboard(string): + r = Tk() + r.withdraw() + r.clipboard_clear() + r.clipboard_append(string) + r.update() + r.destroy() + +def fixpath(p): + return os.path.abspath(os.path.expanduser(p)) + + +def get_fastqs(inputdir): + inputdir = fixpath(inputdir) + inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") + inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") + inputfastqs.extend(inputfqs) + return inputfastqs + + +def delete_files(files): + for f in files: + if os.path.exists(f): + os.remove(f) -# ./renee run \ -# --input ../.tests/*.R?.fastq.gz \ -# --output /data/${USER}/RENEE_testing_230703/RNA_hg38 \ -# --genome /data/CCBR_Pipeliner/Pipelines/RENEE/resources/hg38/30/hg38_30.json \ -# --sif-cache /data/CCBR_Pipeliner/SIFS/ \ -# --mode slurm if __name__ == "__main__": - version_check() launch_gui() diff --git a/src/renee/initialize.py b/src/renee/initialize.py new file mode 100644 index 0000000..a977f07 --- /dev/null +++ b/src/renee/initialize.py @@ -0,0 +1,144 @@ +import os +import re +import sys + +from renee.src.renee.util import ( + _cp_r_safe_, +) + + +def initialize(sub_args, repo_path, output_path): + """Initialize the output directory and copy over required pipeline resources. + If user provides a output directory path that already exists on the filesystem + as a file (small chance of happening but possible), a OSError is raised. If the + output directory PATH already EXISTS, it will not try to create the directory. + If a resource also already exists in the output directory (i.e. output/workflow), + it will not try to copy over that directory. In the future, it maybe worth adding + an optional cli arg called --force, that can modify this behavior. Returns a list + of renamed FastQ files (i.e. renamed symlinks). + @param sub_args : + Parsed arguments for run sub-command + @param repo_path : + Path to RENEE source code and its templates + @param output_path : + Pipeline output path, created if it does not exist + @return inputs list[]: + List of pipeline's input FastQ files + """ + if not os.path.exists(output_path): + # Pipeline output directory does not exist on filesystem + os.makedirs(output_path) + + elif os.path.exists(output_path) and os.path.isfile(output_path): + # Provided Path for pipeline output directory exists as file + raise OSError( + """\n\tFatal: Failed to create provided pipeline output directory! + User provided --output PATH already exists on the filesystem as a file. + Please run {} again with a different --output PATH. + """.format( + sys.argv[0] + ) + ) + + # Copy over templates are other required resources + _cp_r_safe_( + source=repo_path, + target=output_path, + resources=["workflow", "resources", "config"], + ) + + # Create renamed symlinks to rawdata + inputs = _sym_safe_(input_data=sub_args.input, target=output_path) + + return inputs + + +def _sym_safe_(input_data, target): + """Creates re-named symlinks for each FastQ file provided + as input. If a symlink already exists, it will not try to create a new symlink. + If relative source PATH is provided, it will be converted to an absolute PATH. + @param input_data ]>: + List of input files to symlink to target location + @param target : + Target path to copy templates and required resources + @return input_fastqs list[]: + List of renamed input FastQs + """ + input_fastqs = [] # store renamed fastq file names + for file in input_data: + filename = os.path.basename(file) + renamed = os.path.join(target, rename(filename)) + input_fastqs.append(renamed) + + if not os.path.exists(renamed): + # Create a symlink if it does not already exist + # Follow source symlinks to resolve any binding issues + os.symlink(os.path.abspath(os.path.realpath(file)), renamed) + + return input_fastqs + + +def rename(filename): + """Dynamically renames FastQ file to have one of the following extensions: *.R1.fastq.gz, *.R2.fastq.gz + To automatically rename the fastq files, a few assumptions are made. If the extension of the + FastQ file cannot be inferred, an exception is raised telling the user to fix the filename + of the fastq files. + @param filename : + Original name of file to be renamed + @return filename : + A renamed FastQ filename + """ + # Covers common extensions from SF, SRA, EBI, TCGA, and external sequencing providers + # key = regex to match string and value = how it will be renamed + extensions = { + # Matches: _R[12]_fastq.gz, _R[12].fastq.gz, _R[12]_fq.gz, etc. + ".R1.f(ast)?q.gz$": ".R1.fastq.gz", + ".R2.f(ast)?q.gz$": ".R2.fastq.gz", + # Matches: _R[12]_001_fastq_gz, _R[12].001.fastq.gz, _R[12]_001.fq.gz, etc. + # Capture lane information as named group + ".R1.(?P...).f(ast)?q.gz$": ".R1.fastq.gz", + ".R2.(?P...).f(ast)?q.gz$": ".R2.fastq.gz", + # Matches: _[12].fastq.gz, _[12].fq.gz, _[12]_fastq_gz, etc. + "_1.f(ast)?q.gz$": ".R1.fastq.gz", + "_2.f(ast)?q.gz$": ".R2.fastq.gz", + } + + if filename.endswith(".R1.fastq.gz") or filename.endswith(".R2.fastq.gz"): + # Filename is already in the correct format + return filename + + converted = False + for regex, new_ext in extensions.items(): + matched = re.search(regex, filename) + if matched: + # regex matches with a pattern in extensions + converted = True + # Try to get substring for named group lane, retain this in new file extension + # Come back to this later, I am not sure if this is necessary + # That string maybe static (i.e. always the same) + # https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/NamingConvention_FASTQ-files-swBS.htm# + try: + new_ext = "_{}{}".format(matched.group("lane"), new_ext) + except IndexError: + pass # Does not contain the named group lane + + filename = re.sub(regex, new_ext, filename) + break # only rename once + + if not converted: + raise NameError( + """\n\tFatal: Failed to rename provided input '{}'! + Cannot determine the extension of the user provided input file. + Please rename the file list above before trying again. + Here is example of acceptable input file extensions: + sampleName.R1.fastq.gz sampleName.R2.fastq.gz + sampleName_R1_001.fastq.gz sampleName_R2_001.fastq.gz + sampleName_1.fastq.gz sampleName_2.fastq.gz + Please also check that your input files are gzipped? + If they are not, please gzip them before proceeding again. + """.format( + filename + ) + ) + + return filename diff --git a/src/renee/run.py b/src/renee/run.py new file mode 100644 index 0000000..67c37b0 --- /dev/null +++ b/src/renee/run.py @@ -0,0 +1,202 @@ +import json +import os +import pathlib +import sys + +from renee.src.renee.util import renee_base, get_hpcname, get_tmp_dir, orchestrate +from renee.src.renee.conditions import fatal +from renee.src.renee.initialize import initialize +from renee.src.renee.setup import setup +from renee.src.renee.dryrun import dryrun + + +def run(sub_args): + """Initialize, setup, and run the RENEE pipeline. + Calls initialize() to create output directory and copy over pipeline resources, + setup() to create the pipeline config file, dryrun() to ensure their are no issues + before running the pipeline, and finally run() to execute the Snakemake workflow. + @param sub_args : + Parsed arguments for run sub-command + """ + # Get PATH to RENEE git repository for copying over pipeline resources + + # hpcname is either biowulf, frce, or blank + hpcname = get_hpcname() + if sub_args.runmode == "init" or not os.path.exists( + os.path.join(sub_args.output, "config.json") + ): + # Initialize working directory, copy over required pipeline resources + input_files = initialize( + sub_args, repo_path=renee_base(), output_path=sub_args.output + ) + + # Step pipeline for execution, create config.json config file from templates + config = setup( + sub_args, + ifiles=input_files, + repo_path=renee_base(), + output_path=sub_args.output, + ) + # load config from existing file + else: + with open(os.path.join(sub_args.output, "config.json"), "r") as config_file: + config = json.load(config_file) + + # ensure the working dir is read/write friendly + scripts_path = os.path.join(sub_args.output, "workflow", "scripts") + os.chmod(scripts_path, 0o755) + + # Optional Step: Dry-run pipeline + if sub_args.dry_run: + dryrun_output = dryrun( + outdir=sub_args.output + ) # python3 returns byte-string representation + print("\nDry-running RENEE pipeline:\n{}".format(dryrun_output.decode("utf-8"))) + # sys.exit(0) # DONT exit now ... exit after printing singularity bind paths + + # determine "wait" + wait = "" + if sub_args.wait: + wait = "--wait" + + # Resolve all Singularity Bindpaths + rawdata_bind_paths = config["project"]["datapath"] + + # Get FastQ Screen Database paths + # and other reference genome file paths + fqscreen_cfg1 = config["bin"]["rnaseq"]["tool_parameters"]["FASTQ_SCREEN_CONFIG"] + fqscreen_cfg2 = config["bin"]["rnaseq"]["tool_parameters"]["FASTQ_SCREEN_CONFIG2"] + fq_screen_paths = get_fastq_screen_paths( + [ + os.path.join(sub_args.output, fqscreen_cfg1), + os.path.join(sub_args.output, fqscreen_cfg2), + ] + ) + kraken_db_path = [config["bin"]["rnaseq"]["tool_parameters"]["KRAKENBACDB"]] + genome_bind_paths = resolve_additional_bind_paths( + list(config["references"]["rnaseq"].values()) + fq_screen_paths + kraken_db_path + ) + all_bind_paths = "{},{}".format(",".join(genome_bind_paths), rawdata_bind_paths) + + if sub_args.dry_run: # print singularity bind baths and exit + print("\nSingularity Bind Paths:{}".format(all_bind_paths)) + sys.exit(0) + + # Run pipeline + masterjob = orchestrate( + mode=sub_args.mode, + outdir=sub_args.output, + additional_bind_paths=all_bind_paths, + alt_cache=sub_args.singularity_cache, + threads=sub_args.threads, + tmp_dir=get_tmp_dir(sub_args.tmp_dir, sub_args.output), + wait=wait, + hpcname=hpcname, + ) + + # Wait for subprocess to complete, + # this is blocking + masterjob.wait() + + # Relay information about submission + # of the master job or the exit code of the + # pipeline that ran in local mode + if sub_args.mode == "local": + if int(masterjob.returncode) == 0: + print("{} pipeline has successfully completed".format("RENEE")) + else: + fatal( + "{} pipeline failed. Please see standard output for more information.".format( + "RENEE" + ) + ) + elif sub_args.mode == "slurm": + jobid = ( + open(os.path.join(sub_args.output, "logfiles", "mjobid.log")).read().strip() + ) + if int(masterjob.returncode) == 0: + print("Successfully submitted master job: ", end="") + else: + fatal( + "Error occurred when submitting the master job. Error code = {}".format( + masterjob.returncode + ) + ) + print(jobid) + + +def resolve_additional_bind_paths(search_paths): + """Finds additional singularity bind paths from a list of random paths. Paths are + indexed with a compostite key containing the first two directories of an absolute + file path to avoid issues related to shared names across the /gpfs shared network + filesystem. For each indexed list of file paths, a common path is found. Assumes + that the paths provided are absolute paths, the renee build sub command creates + resource file index with absolute filenames. + @param search_paths list[]: + List of absolute file paths to find common bind paths from + @return common_paths list[]: + Returns a list of common shared file paths to create additional singularity bind paths + """ + common_paths = [] + indexed_paths = {} + + for ref in search_paths: + # Skip over resources with remote URI and + # skip over strings that are not file PATHS as + # RENEE build creates absolute resource PATHS + if ( + ref.lower().startswith("sftp://") + or ref.lower().startswith("s3://") + or ref.lower().startswith("gs://") + or not ref.lower().startswith(os.sep) + ): + continue + + # Break up path into directory tokens + for r in [ + ref, + str(pathlib.Path(ref).resolve()), + ]: # taking care of paths which are symlinks! + path_list = os.path.abspath(r).split(os.sep) + + try: # Create composite index from first two directories + # Avoids issues created by shared /gpfs/ PATHS + index = path_list[1:3] + index = tuple(index) + except IndexError: + index = path_list[1] # ref startswith / + if index not in indexed_paths: + indexed_paths[index] = [] + # Create an INDEX to find common PATHS for each root child directory + # like /scratch or /data. This prevents issues when trying to find the + # common path between these two different directories (resolves to /) + indexed_paths[index].append(str(os.sep).join(path_list)) + + for index, paths in indexed_paths.items(): + # Find common paths for each path index + common_paths.append(os.path.dirname(os.path.commonprefix(paths))) + + return list(set(common_paths)) + + +def get_fastq_screen_paths(fastq_screen_confs, match="DATABASE", file_index=-1): + """Parses fastq_screen.conf files to get the paths of each fastq_screen database. + This path contains bowtie2 indices for reference genome to screen against. + The paths are added as singularity bind points. + @param fastq_screen_confs list[]: + Name of fastq_screen config files to parse + @param match : + Keyword to indicate a line match [default: 'DATABASE'] + @param file_index : + Index of line line containing the fastq_screen database path + @return list[]: + Returns a list of fastq_screen database paths + """ + databases = [] + for file in fastq_screen_confs: + with open(file, "r") as fh: + for line in fh: + if line.startswith(match): + db_path = line.strip().split()[file_index] + databases.append(db_path) + return databases diff --git a/src/renee/setup.py b/src/renee/setup.py new file mode 100644 index 0000000..253166e --- /dev/null +++ b/src/renee/setup.py @@ -0,0 +1,329 @@ +import os +import json +import re +import subprocess +import sys + +from renee.src.renee.util import ( + get_hpcname, + get_version, + get_tmp_dir, +) +from renee.src.renee.cache import image_cache + + +def setup(sub_args, ifiles, repo_path, output_path): + """Setup the pipeline for execution and creates config file from templates + @param sub_args : + Parsed arguments for run sub-command + @param repo_path : + Path to RENEE source code and its templates + @param output_path : + Pipeline output path, created if it does not exist + @return config : + Config dictionary containing metadata to run the pipeline + @return hpcname : + """ + # Resolves PATH to template for genomic reference files to select from a + # bundled reference genome or a user generated reference genome built via + # renee build subcommand + hpcname = get_hpcname() + if hpcname == "biowulf": + print("Thank you for running RENEE on BIOWULF!") + genome_config = os.path.join( + output_path, "config", "genomes", hpcname, sub_args.genome + ".json" + ) + elif hpcname == "frce": + print("Thank you for running RENEE on FRCE!") + genome_config = os.path.join( + output_path, "config", "genomes", hpcname, sub_args.genome + ".json" + ) + else: + genome_config = os.path.join( + output_path, "config", "genomes", sub_args.genome + ".json" + ) + if sub_args.genome.endswith(".json"): + # Provided a custom reference genome generated by renee build + genome_config = os.path.abspath(sub_args.genome) + + required = { + # Template for project-level information + "project": os.path.join(output_path, "config", "templates", "project.json"), + # Template for genomic reference files + # User provided argument --genome is used to select the template + "genome": genome_config, + # Template for tool information + "tools": os.path.join(output_path, "config", "templates", "tools.json"), + } + + # Global config file for pipeline, config.json + config = join_jsons(required.values()) # uses templates in the renee repo + # Update cluster-specific paths for fastq screen & kraken db + if hpcname == "biowulf" or hpcname == "frce": + db_json_filename = os.path.join( + output_path, "config", "templates", f"dbs_{hpcname}.json" + ) + with open( + os.path.join(os.path.dirname(os.path.abspath(__file__)), db_json_filename), + "r", + ) as json_file: + config["bin"]["rnaseq"]["tool_parameters"].update(json.load(json_file)) + + config = add_user_information(config) + config = add_rawdata_information(sub_args, config, ifiles) + + # Resolves if an image needs to be pulled from an OCI registry or + # a local SIF generated from the renee cache subcommand exists + config = image_cache(sub_args, config) + + # Add other cli collected info + config["project"]["annotation"] = sub_args.genome + config["project"]["version"] = get_version() + config["project"]["pipelinehome"] = os.path.dirname(__file__) + config["project"]["workpath"] = os.path.abspath(sub_args.output) + genome_annotation = sub_args.genome + config["project"]["organism"] = genome_annotation.split("_")[0] + + # Add optional cli workflow steps + config["options"] = {} + config["options"]["star_2_pass_basic"] = sub_args.star_2_pass_basic + config["options"]["small_rna"] = sub_args.small_rna + config["options"]["tmp_dir"] = get_tmp_dir(sub_args.tmp_dir, output_path) + config["options"]["shared_resources"] = sub_args.shared_resources + if sub_args.wait: + config["options"]["wait"] = "True" + else: + config["options"]["wait"] = "False" + if sub_args.create_nidap_folder: + config["options"]["create_nidap_folder"] = "True" + else: + config["options"]["create_nidap_folder"] = "False" + + # Get latest git commit hash + git_hash = get_repo_git_commit_hash(repo_path) + config["project"]["git_commit_hash"] = git_hash + + if sub_args.shared_resources: + # Update paths to shared resources directory + config["bin"]["rnaseq"]["tool_parameters"][ + "FASTQ_SCREEN_CONFIG" + ] = os.path.join( + sub_args.shared_resources, "fastq_screen_db", "fastq_screen.conf" + ) + config["bin"]["rnaseq"]["tool_parameters"][ + "FASTQ_SCREEN_CONFIG2" + ] = os.path.join( + sub_args.shared_resources, "fastq_screen_db", "fastq_screen_2.conf" + ) + config["bin"]["rnaseq"]["tool_parameters"]["KRAKENBACDB"] = os.path.join( + sub_args.shared_resources, "20180907_standard_kraken2" + ) + + # Save config to output directory + print( + "\nGenerating config file in '{}'... ".format( + os.path.join(output_path, "config.json") + ), + end="", + ) + with open(os.path.join(output_path, "config.json"), "w") as fh: + json.dump(config, fh, indent=4, sort_keys=True) + print("Done!") + + return config + + +def add_user_information(config): + """Adds username and user's home directory to config. + @params config : + Config dictionary containing metadata to run pipeline + @return config : + Updated config dictionary containing user information (username and home directory) + """ + # Get PATH to user's home directory + # Method is portable across unix-like OS and Windows + home = os.path.expanduser("~") + + # Get username from home directory PATH + username = os.path.split(home)[-1] + + # Update config with home directory and username + config["project"]["userhome"] = home + config["project"]["username"] = username + + return config + + +def add_rawdata_information(sub_args, config, ifiles): + """Adds information about rawdata provided to pipeline. + Determines whether the dataset is paired-end or single-end and finds the set of all + rawdata directories (needed for -B option when running singularity). If a user provides + paired-end data, checks to see if both mates (R1 and R2) are present for each sample. + @param sub_args : + Parsed arguments for run sub-command + @params ifiles list[]: + List containing pipeline input files (renamed symlinks) + @params config : + Config dictionary containing metadata to run pipeline + @return config : + Updated config dictionary containing user information (username and home directory) + """ + # Determine whether dataset is paired-end or single-ends + # Updates config['project']['nends']: 1 = single-end, 2 = paired-end + nends = get_nends(ifiles) # Checks PE data for both mates (R1 and R2) + config["project"]["nends"] = nends + + # Finds the set of rawdata directories to bind + rawdata_paths = get_rawdata_bind_paths(input_files=sub_args.input) + config["project"]["datapath"] = ",".join(rawdata_paths) + + # Add each sample's basename, label and group info + config = add_sample_metadata(input_files=ifiles, config=config) + + return config + + +def get_nends(ifiles): + """Determines whether the dataset is paired-end or single-end. + If paired-end data, checks to see if both mates (R1 and R2) are present for each sample. + If single-end, nends is set to 1. Else if paired-end, nends is set to 2. + @params ifiles list[]: + List containing pipeline input files (renamed symlinks) + @return nends_status : + Integer reflecting nends status: 1 = se, 2 = pe + """ + # Determine if dataset contains paired-end data + paired_end = False + nends_status = 1 + for file in ifiles: + if file.endswith(".R2.fastq.gz"): + paired_end = True + nends_status = 2 + break # dataset is paired-end + + # Check to see if both mates (R1 and R2) are present paired-end data + if paired_end: + nends = {} # keep count of R1 and R2 for each sample + for file in ifiles: + # Split sample name on file extension + sample = re.split("\.R[12]\.fastq\.gz", os.path.basename(file))[0] + if sample not in nends: + nends[sample] = 0 + + nends[sample] += 1 + + # Check if samples contain both read mates + missing_mates = [sample for sample, count in nends.items() if count == 1] + if missing_mates: + # Missing an R1 or R2 for a provided input sample + raise NameError( + """\n\tFatal: Detected pair-end data but user failed to provide + both mates (R1 and R2) for the following samples:\n\t\t{}\n + Please check that the basename for each sample is consistent across mates. + Here is an example of a consistent basename across mates: + consistent_basename.R1.fastq.gz + consistent_basename.R2.fastq.gz + + Please do not run the pipeline with a mixture of single-end and paired-end + samples. This feature is currently not supported within {}, and it is + not recommended either. If this is a priority for your project, please run + paired-end samples and single-end samples separately (in two separate output directories). + If you feel like this functionality should exist, feel free to open an issue on Github. + """.format( + missing_mates, sys.argv[0] + ) + ) + + return nends_status + + +def get_rawdata_bind_paths(input_files): + """ + Gets rawdata bind paths of user provided fastq files. + @params input_files list[]: + List containing user-provided input fastq files + @return bindpaths : + Set of rawdata bind paths + """ + bindpaths = [] + for file in input_files: + # Get directory of input file + rawdata_src_path = os.path.dirname(os.path.abspath(os.path.realpath(file))) + if rawdata_src_path not in bindpaths: + bindpaths.append(rawdata_src_path) + + return bindpaths + + +def add_sample_metadata(input_files, config, group=None): + """Adds sample metadata such as sample basename, label, and group information. + If sample sheet is provided, it will default to using information in that file. + If no sample sheet is provided, it will only add sample basenames and labels. + @params input_files list[]: + List containing pipeline input fastq files + @params config : + Config dictionary containing metadata to run pipeline + @params group : + Sample sheet containing basename, group, and label for each sample + @return config : + Updated config with basenames, labels, and groups (if provided) + """ + # TODO: Add functionality for basecase when user has samplesheet + added = [] + for file in input_files: + # Split sample name on file extension + sample = re.split("\.R[12]\.fastq\.gz", os.path.basename(file))[0] + if sample not in added: + # Only add PE sample information once + added.append(sample) + config["project"]["groups"]["rsamps"].append(sample) + config["project"]["groups"]["rgroups"].append(sample) + config["project"]["groups"]["rlabels"].append(sample) + + return config + + +def join_jsons(templates): + """Joins multiple JSON files to into one data structure + Used to join multiple template JSON files to create a global config dictionary. + @params templates : + List of template JSON files to join together + @return aggregated : + Dictionary containing the contents of all the input JSON files + """ + # Get absolute PATH to templates in renee git repo + repo_path = os.path.dirname(os.path.abspath(__file__)) + aggregated = {} + + for file in templates: + with open(os.path.join(repo_path, file), "r") as fh: + aggregated.update(json.load(fh)) + + return aggregated + + +def get_repo_git_commit_hash(repo_path): + """Gets the git commit hash of the RENEE repo. + @param repo_path : + Path to RENEE git repo + @return githash : + Latest git commit hash + """ + try: + githash = ( + subprocess.check_output( + ["git", "rev-parse", "HEAD"], stderr=subprocess.STDOUT, cwd=repo_path + ) + .strip() + .decode("utf-8") + ) + # Typecast to fix python3 TypeError (Object of type bytes is not JSON serializable) + # subprocess.check_output() returns a byte string + githash = str(githash) + except Exception as e: + # Github releases are missing the .git directory, + # meaning you cannot get a commit hash, set the + # commit hash to indicate its from a GH release + githash = "github_release" + + return githash diff --git a/src/renee/util.py b/src/renee/util.py new file mode 100644 index 0000000..ad708b5 --- /dev/null +++ b/src/renee/util.py @@ -0,0 +1,329 @@ +import datetime +import glob +import os +import subprocess +import shutil +import sys +import warnings + + +def renee_base(rel_path=""): + """Get the absolute path to a file in the RENEE repository + @return abs_path + """ + basedir = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + ) + return os.path.join(basedir, rel_path) + + +def get_version(): + """Get the current RENEE version + @return version + """ + with open(renee_base("VERSION"), "r") as vfile: + version = f"v{vfile.read().strip()}" + return version + + +def scontrol_show(): + """Run scontrol show config and parse the output as a dictionary + @return scontrol_dict : + """ + scontrol_dict = dict() + scontrol_out = subprocess.run( + "scontrol show config", shell=True, capture_output=True, text=True + ).stdout + if len(scontrol_out) > 0: + for line in scontrol_out.split("\n"): + line_split = line.split("=") + if len(line_split) > 1: + scontrol_dict[line_split[0].strip()] = line_split[1].strip() + return scontrol_dict + + +def get_hpcname(): + """Get the HPC name (biowulf, frce, or an empty string) + @return hpcname + """ + scontrol_out = scontrol_show() + hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else "" + if hpc == "fnlcr": + hpc = "frce" + return hpc + + +def get_tmp_dir(tmp_dir, outdir, hpc=get_hpcname()): + """Get default temporary directory for biowulf and frce. Allow user override.""" + if not tmp_dir: + if hpc == "biowulf": + tmp_dir = "/lscratch/$SLURM_JOBID" + elif hpc == "frce": + tmp_dir = outdir + else: + tmp_dir = None + return tmp_dir + + +def get_shared_resources_dir(shared_dir, outdir, hpc=get_hpcname()): + """Get default shared resources directory for biowulf and frce. Allow user override.""" + if not shared_dir: + if hpc == "biowulf": + shared_dir = ( + "/data/CCBR_Pipeliner/Pipelines/RENEE/resources/shared_resources" + ) + elif hpc == "frce": + shared_dir = "/mnt/projects/CCBR-Pipelines/pipelines/RENEE/resources/shared_resources" + return shared_dir + + +def get_genomes_list( + hpcname=get_hpcname(), +): # TODO call get_genomes_dict and extract list; only warn if no genomes found + """Get list of genome annotations available for the current platform + @return genomes_list + """ + genome_config_dir = renee_base(os.path.join("config", "genomes", hpcname)) + json_files = glob.glob(genome_config_dir + "/*.json") + if not json_files: + warnings.warn( + f"WARNING: No Genome Annotation JSONs found in {genome_config_dir}. Please specify a custom genome json file with `--genome`" + ) + genomes = [os.path.basename(file).replace(".json", "") for file in json_files] + return sorted(genomes) + + +def get_genomes_dict( + hpcname=get_hpcname(), +): # TODO option to either warn or error if genomes not found + """Get dictionary of genome annotation versions and the paths to the corresponding JSON files + @return genomes_dict { genome_name: json_file_path } + """ + genomes_dir = renee_base(os.path.join("config", "genomes", hpcname)) + if not os.path.exists(genomes_dir): + raise FileNotFoundError(f"ERROR: Folder does not exist : {genomes_dir}") + search_term = genomes_dir + "/*.json" + json_files = glob.glob(search_term) + if len(json_files) == 0: + raise FileNotFoundError( + f"ERROR: No Genome+Annotation JSONs found in : {genomes_dir}" + ) + genomes_dict = { + os.path.basename(json_file).replace(".json", ""): json_file + for json_file in json_files + } + return genomes_dict + + +def check_python_version(): + # version check + # glob.iglob requires 3.11 for using "include_hidden=True" + MIN_PYTHON = (3, 11) + try: + assert sys.version_info >= MIN_PYTHON + print( + "Python version: {0}.{1}.{2}".format( + sys.version_info.major, sys.version_info.minor, sys.version_info.micro + ) + ) + except AssertionError: + exit( + f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" + ) + + +def _cp_r_safe_( + source, target, resources=["workflow", "resources", "config"], safe_mode=True +): + """Private function: Given a list paths it will recursively copy each to the + target location. If a target path already exists, it will not over-write the + existing paths data when `safe_mode` is on. + @param resources : + List of paths to copy over to target location. + Default: ["workflow", "resources", "config"] + @params source : + Add a prefix PATH to each resource + @param target : + Target path to copy templates and required resources (aka destination) + @param safe_mode : + Only copy the resources to the target path + if they do not exist in the target path (default: True) + """ + for resource in resources: + destination = os.path.join(target, resource) + if os.path.exists(destination) and safe_mode: + print(f"🚫 path exists and `safe_mode` is ON, not copying: {destination}") + else: + # Required resources do not exist, or safe mode is off + shutil.copytree( + os.path.join(source, resource), destination, dirs_exist_ok=not safe_mode + ) + + +def orchestrate( + mode, + outdir, + additional_bind_paths, + alt_cache, + threads=2, + submission_script="runner", + masterjob="pl:renee", + tmp_dir=None, + wait="", + hpcname="", +): + """Runs RENEE pipeline via selected executor: local or slurm. + If 'local' is selected, the pipeline is executed locally on a compute node/instance. + If 'slurm' is selected, jobs will be submitted to the cluster using SLURM job scheduler. + Support for additional job schedulers (i.e. PBS, SGE, LSF) may be added in the future. + @param outdir : + Pipeline output PATH + @param mode : + Execution method or mode: + local runs serially a compute instance without submitting to the cluster. + slurm will submit jobs to the cluster using the SLURM job scheduler. + @param additional_bind_paths : + Additional paths to bind to container filesystem (i.e. input file paths) + @param alt_cache : + Alternative singularity cache location + @param threads : + Number of threads to use for local execution method + @param submission_script : + Path to master jobs submission script: + renee run = /path/to/output/resources/runner + renee build = /path/to/output/resources/builder + @param masterjob : + Name of the master job + @param tmp_dir : + Absolute Path to temp dir for compute node + @param wait : + "--wait" to wait for master job to finish. This waits when pipeline is called via NIDAP API + @param hpcname : + "biowulf" if run on biowulf, "frce" if run on frce, blank otherwise. hpcname is determined in setup() function + @return masterjob : + """ + # Add additional singularity bind PATHs + # to mount the local filesystem to the + # containers filesystem, NOTE: these + # PATHs must be an absolute PATHs + outdir = os.path.abspath(outdir) + # Add any default PATHs to bind to + # the container's filesystem, like + # tmp directories, /lscratch + addpaths = [] + # set tmp_dir depending on hpc + tmp_dir = get_tmp_dir(tmp_dir, outdir) + temp = os.path.dirname(tmp_dir.rstrip("/")) + if temp == os.sep: + temp = tmp_dir.rstrip("/") + if outdir not in additional_bind_paths.split(","): + addpaths.append(outdir) + if temp not in additional_bind_paths.split(","): + addpaths.append(temp) + bindpaths = ",".join(addpaths) + + # Set ENV variable 'SINGULARITY_CACHEDIR' + # to output directory + my_env = {} + my_env.update(os.environ) + cache = os.path.join(outdir, ".singularity") + my_env["SINGULARITY_CACHEDIR"] = cache + + if alt_cache: + # Override the pipeline's default cache location + my_env["SINGULARITY_CACHEDIR"] = alt_cache + cache = alt_cache + + if additional_bind_paths: + # Add Bind PATHs for outdir and tmp dir + if bindpaths: + bindpaths = ",{}".format(bindpaths) + bindpaths = "{}{}".format(additional_bind_paths, bindpaths) + + if not os.path.exists(os.path.join(outdir, "logfiles")): + # Create directory for logfiles + os.makedirs(os.path.join(outdir, "logfiles")) + + if os.path.exists(os.path.join(outdir, "logfiles", "snakemake.log")): + mtime = _get_file_mtime(os.path.join(outdir, "logfiles", "snakemake.log")) + newname = os.path.join(outdir, "logfiles", "snakemake." + str(mtime) + ".log") + os.rename(os.path.join(outdir, "logfiles", "snakemake.log"), newname) + + # Create .singularity directory for installations of snakemake + # without setuid which create a sandbox in the SINGULARITY_CACHEDIR + if not os.path.exists(cache): + # Create directory for sandbox and image layers + os.makedirs(cache) + + # Run on compute node or instance without submitting jobs to a scheduler + if mode == "local": + # Run RENEE: instantiate main/master process + # Look into later: it maybe worth replacing Popen subprocess with a direct + # snakemake API call: https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html + # Create log file for pipeline + logfh = open(os.path.join(outdir, "logfiles", "snakemake.log"), "w") + masterjob = subprocess.Popen( + [ + "snakemake", + "-pr", + "--use-singularity", + "--singularity-args", + "'-B {}'".format(bindpaths), + "--cores", + str(threads), + "--configfile=config.json", + ], + cwd=outdir, + env=my_env, + ) + + # Submitting jobs to cluster via SLURM's job scheduler + elif mode == "slurm": + # Run RENEE: instantiate main/master process + # Look into later: it maybe worth replacing Popen subprocess with a direct + # snakemake API call: https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html + # snakemake --latency-wait 120 -s $R/Snakefile -d $R --printshellcmds + # --cluster-config $R/cluster.json --keep-going --restart-times 3 + # --cluster "sbatch --gres {cluster.gres} --cpus-per-task {cluster.threads} -p {cluster.partition} -t {cluster.time} --mem {cluster.mem} --job-name={params.rname}" + # -j 500 --rerun-incomplete --stats $R/Reports/initialqc.stats -T + # 2>&1| tee -a $R/Reports/snakemake.log + + # Create log file for master job information + logfh = open(os.path.join(outdir, "logfiles", "master.log"), "w") + # submission_script for renee run is /path/to/output/resources/runner + # submission_script for renee build is /path/to/output/resources/builder + cmdlist = [ + str(os.path.join(outdir, "resources", str(submission_script))), + mode, + "-j", + str(masterjob), + "-b", + str(bindpaths), + "-o", + str(outdir), + "-c", + str(cache), + "-t", + str(tmp_dir), + ] + if str(wait) == "--wait": + cmdlist.append("-w") + if str(hpcname) != "": + cmdlist.append("-n") + cmdlist.append(hpcname) + else: + cmdlist.append("-n") + cmdlist.append("unknown") + + print(" ".join(cmdlist)) + masterjob = subprocess.Popen( + cmdlist, cwd=outdir, stderr=subprocess.STDOUT, stdout=logfh, env=my_env + ) + + return masterjob + + +def _get_file_mtime(f): + timestamp = datetime.fromtimestamp(os.path.getmtime(os.path.abspath(f))) + mtime = timestamp.strftime("%y%m%d%H%M%S") + return mtime diff --git a/tests/test_build.py b/tests/test_build.py index 4bfeaf5..1a291f6 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -4,7 +4,7 @@ import pathlib import tempfile -from renee.src.renee.__main__ import _cp_r_safe_ +from renee.src.renee.__main__ import build renee_build = ( "src/renee/__main__.py build " @@ -14,35 +14,3 @@ "--ref-gtf .tests/KO_S3.R1.fastq.gz " "--gtf-ver 0 " ) -RENEE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - -def test_cp_safe(): - with tempfile.TemporaryDirectory() as tmp_dir: - outdir = os.path.join(tmp_dir, "testout") - os.makedirs(os.path.join(outdir, "config")) - pathlib.Path(os.path.join(outdir, "config", "tmp.txt")).touch() - with contextlib.redirect_stdout(io.StringIO()) as stdout: - _cp_r_safe_( - source=RENEE_PATH, - target=outdir, - resources=["config"], - safe_mode=True, - ) - assert "path exists and `safe_mode` is ON, not copying" in stdout.getvalue() - - -def test_cp_unsafe(): - with tempfile.TemporaryDirectory() as tmp_dir: - outdir = os.path.join(tmp_dir, "testout") - configdir = os.path.join(outdir, "config") - os.makedirs(configdir) - pathlib.Path(os.path.join(configdir, "tmp.txt")).touch() - with contextlib.redirect_stdout(io.StringIO()) as stdout: - _cp_r_safe_( - source=RENEE_PATH, - target=outdir, - resources=["config"], - safe_mode=False, - ) - assert not stdout.getvalue() and "config.yaml" in os.listdir(configdir) diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000..45ec9e2 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,46 @@ +import contextlib +import io +import os +import pathlib +import tempfile + +from renee.src.renee.util import ( + renee_base, + _cp_r_safe_, +) + + +def test_renee_base(): + renee_bin = renee_base(os.path.join("bin", "renee")) + assert renee_bin.endswith("/bin/renee") and os.path.exists(renee_bin) + + +def test_cp_safe(): + with tempfile.TemporaryDirectory() as tmp_dir: + outdir = os.path.join(tmp_dir, "testout") + os.makedirs(os.path.join(outdir, "config")) + pathlib.Path(os.path.join(outdir, "config", "tmp.txt")).touch() + with contextlib.redirect_stdout(io.StringIO()) as stdout: + _cp_r_safe_( + source=renee_base(), + target=outdir, + resources=["config"], + safe_mode=True, + ) + assert "path exists and `safe_mode` is ON, not copying" in stdout.getvalue() + + +def test_cp_unsafe(): + with tempfile.TemporaryDirectory() as tmp_dir: + outdir = os.path.join(tmp_dir, "testout") + configdir = os.path.join(outdir, "config") + os.makedirs(configdir) + pathlib.Path(os.path.join(configdir, "tmp.txt")).touch() + with contextlib.redirect_stdout(io.StringIO()) as stdout: + _cp_r_safe_( + source=renee_base(), + target=outdir, + resources=["config"], + safe_mode=False, + ) + assert not stdout.getvalue() and "config.yaml" in os.listdir(configdir) From e4df1f1d9b25e53891d4a705f50c9ff6e8e926ab Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 17:38:45 -0400 Subject: [PATCH 10/32] feat: make sure cli works without pip install source from https://github.com/CCBR/CHAMPAGNE/pull/180 --- bin/redirect | 2 +- main.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100755 main.py diff --git a/bin/redirect b/bin/redirect index 824d007..99a1086 100755 --- a/bin/redirect +++ b/bin/redirect @@ -67,4 +67,4 @@ elif [[ $ISFRCE == true ]];then export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH" fi -${TOOLDIR}/src/renee/__main__.py "$@" || true +${TOOLDIR}/main.py "$@" || true diff --git a/main.py b/main.py new file mode 100755 index 0000000..1fc1704 --- /dev/null +++ b/main.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +import os +import re +import sys + +# add script directory to the path to allow champagne CLI to work out-of-the-box +# without the need to install it via pip first +SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src", "renee") +sys.path.append(SCRIPT_DIR) +from src.renee.__main__ import main + +if ( + __name__ == "__main__" +): # this block is adapted from the executable file created by `pip install` + sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) + sys.exit(main()) From 1b1446c0aa6d53b62347f82edaed2d958d818e70 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 17:40:05 -0400 Subject: [PATCH 11/32] refactor: use relative imports everywhere --- src/renee/__main__.py | 10 +++++----- src/renee/gui.py | 4 ++-- src/renee/initialize.py | 2 +- src/renee/run.py | 10 +++++----- src/renee/setup.py | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/renee/__main__.py b/src/renee/__main__.py index 7019cea..dd65c75 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -24,11 +24,11 @@ import argparse # local imports -from renee.src.renee.run import run -from renee.src.renee.dryrun import dryrun -from renee.src.renee.gui import launch_gui -from renee.src.renee.conditions import fatal -from renee.src.renee.util import ( +from .run import run +from .dryrun import dryrun +from .gui import launch_gui +from .conditions import fatal +from .util import ( get_hpcname, get_tmp_dir, get_genomes_list, diff --git a/src/renee/gui.py b/src/renee/gui.py index 01da95b..701b1c2 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -8,13 +8,13 @@ import sys from tkinter import Tk -from renee.src.renee.util import ( +from .util import ( get_genomes_dict, get_tmp_dir, get_shared_resources_dir, renee_base, ) -from renee.src.renee.run import run +from .run import run # TODO: get rid of all the global variables, get values from CLI flags instead global DEBUG diff --git a/src/renee/initialize.py b/src/renee/initialize.py index a977f07..75d2207 100644 --- a/src/renee/initialize.py +++ b/src/renee/initialize.py @@ -2,7 +2,7 @@ import re import sys -from renee.src.renee.util import ( +from .util import ( _cp_r_safe_, ) diff --git a/src/renee/run.py b/src/renee/run.py index 67c37b0..3dd35c1 100644 --- a/src/renee/run.py +++ b/src/renee/run.py @@ -3,11 +3,11 @@ import pathlib import sys -from renee.src.renee.util import renee_base, get_hpcname, get_tmp_dir, orchestrate -from renee.src.renee.conditions import fatal -from renee.src.renee.initialize import initialize -from renee.src.renee.setup import setup -from renee.src.renee.dryrun import dryrun +from .util import renee_base, get_hpcname, get_tmp_dir, orchestrate +from .conditions import fatal +from .initialize import initialize +from .setup import setup +from .dryrun import dryrun def run(sub_args): diff --git a/src/renee/setup.py b/src/renee/setup.py index 253166e..9130ae8 100644 --- a/src/renee/setup.py +++ b/src/renee/setup.py @@ -4,12 +4,12 @@ import subprocess import sys -from renee.src.renee.util import ( +from .util import ( get_hpcname, get_version, get_tmp_dir, ) -from renee.src.renee.cache import image_cache +from .cache import image_cache def setup(sub_args, ifiles, repo_path, output_path): From d70f77a4540c93013f9c0319f03492c89940bf2a Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 19:02:09 -0400 Subject: [PATCH 12/32] test: use ./bin/renee to invoke the CLI rather than src/renee/__main__.py, so that the src files will be in the path via ./main.py --- tests/test_build.py | 2 +- tests/test_cache.py | 2 +- tests/test_cli.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_build.py b/tests/test_build.py index 1a291f6..9075769 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -7,7 +7,7 @@ from renee.src.renee.__main__ import build renee_build = ( - "src/renee/__main__.py build " + "./bin/renee build " "--dry-run " "--ref-name test " "--ref-fa .tests/KO_S3.R1.fastq.gz " diff --git a/tests/test_cache.py b/tests/test_cache.py index 384c9c4..1757785 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -4,7 +4,7 @@ import subprocess renee_run = ( - "src/renee/__main__.py run " + "./bin/renee run " "--mode local --runmode init --dry-run " "--input .tests/*.fastq.gz " "--genome config/genomes/biowulf/hg38_30.json " diff --git a/tests/test_cli.py b/tests/test_cli.py index 323870d..d47763c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,7 +2,7 @@ import subprocess renee_run = ( - "src/renee/__main__.py run " + "./bin/renee run " "--mode local --runmode init --dry-run " "--input .tests/*.fastq.gz " "--genome config/genomes/biowulf/hg38_30.json " @@ -11,14 +11,14 @@ def test_help(): output = subprocess.run( - "src/renee/__main__.py --help", capture_output=True, shell=True, text=True + "./bin/renee --help", capture_output=True, shell=True, text=True ).stdout assert "RENEE" in output def test_version(): output = subprocess.run( - "src/renee/__main__.py --version", capture_output=True, shell=True, text=True + "./bin/renee --version", capture_output=True, shell=True, text=True ).stdout assert "renee v" in output @@ -37,7 +37,7 @@ def test_subcommands_help(): [ f"renee {cmd } [--help]" in subprocess.run( - f"src/renee/__main__.py {cmd} --help", + f"./bin/renee {cmd} --help", capture_output=True, shell=True, text=True, From 017cc300b58798207cc036917e2bdb42b8fed832 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 19:09:33 -0400 Subject: [PATCH 13/32] test: help pytest find src for python path --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 953aaec..244be44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,3 +66,8 @@ renee = "." [tool.setuptools.dynamic] version = {file = "VERSION"} readme = {file = "README.md"} + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] From 5b0d50e4ce07d0d1b4e6bb3f0a95fdcd40f250ea Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 19:09:53 -0400 Subject: [PATCH 14/32] fix: datetime usage --- src/renee/dryrun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/renee/dryrun.py b/src/renee/dryrun.py index 0a05d56..93d6231 100644 --- a/src/renee/dryrun.py +++ b/src/renee/dryrun.py @@ -63,7 +63,7 @@ def dryrun( def _now(): - ct = datetime.now() + ct = datetime.datetime.now() now = ct.strftime("%y%m%d%H%M%S") return now From 3d998a5c2e06dbe86d8f08063d5d47b8b97d43a6 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 19:12:58 -0400 Subject: [PATCH 15/32] feat: set default singularity sif dir resolves #53 --- CHANGELOG.md | 1 + src/renee/__main__.py | 2 ++ src/renee/cache.py | 11 +++++++++++ tests/test_cache.py | 14 ++++++++++++-- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e50d49d..9e6ce03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - Fix RSeQC environments: - Set RSeQC envmodule version to 4.0.0, which synchronizes it with the version in the docker container used by singularity. (#122, @kelly-sovacool) - Update docker with RSeQC's tools properly added to the path. (#123, @kelly-sovacool) +- Set default shared singularity SIF directory for biowulf and frce. (#94, @kelly-sovacool) ## RENEE 2.5.11 diff --git a/src/renee/__main__.py b/src/renee/__main__.py index dd65c75..01ccba7 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -24,6 +24,7 @@ import argparse # local imports +from .cache import get_sif_cache_dir from .run import run from .dryrun import dryrun from .gui import launch_gui @@ -907,6 +908,7 @@ def parsed_arguments(name, description): type=lambda option: os.path.abspath(os.path.expanduser(option)), required=False, help=argparse.SUPPRESS, + default=get_sif_cache_dir(), ) # Create NIDAP output folder diff --git a/src/renee/cache.py b/src/renee/cache.py index 2e9f5c0..21f9961 100644 --- a/src/renee/cache.py +++ b/src/renee/cache.py @@ -2,6 +2,17 @@ import os import sys +from .util import get_hpcname + + +def get_sif_cache_dir(hpc=get_hpcname()): + sif_dir = None + if hpc == "biowulf": + sif_dir = "/data/CCBR_Pipeliner/SIFS" + elif hpc == "frce": + sif_dir = "/mnt/projects/CCBR-Pipelines/SIFs" + return sif_dir + def image_cache(sub_args, config): """Adds Docker Image URIs, or SIF paths to config if singularity cache option is provided. diff --git a/tests/test_cache.py b/tests/test_cache.py index 1757785..f78c4e5 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -3,6 +3,8 @@ import os.path import subprocess +from src.renee.cache import get_sif_cache_dir + renee_run = ( "./bin/renee run " "--mode local --runmode init --dry-run " @@ -42,7 +44,15 @@ def test_cache_sif(): def test_cache_nosif(): output, config = run_in_temp(f"{renee_run}") assertions = [ - config["images"]["arriba"] == "docker://nciccbr/ccbr_arriba_2.0.0:v0.0.1", - "The singularity command has to be available" in output.stderr, + config["images"]["arriba"] == "docker://nciccbr/ccbr_arriba_2.0.0:v0.0.1" ] assert all(assertions) + + +def test_get_sif_cache_dir(): + assertions = [ + "'CCBR_Pipeliner/SIFS' in get_sif_cache_dir('biowulf')", + "'CCBR-Pipelines/SIFs' in get_sif_cache_dir('frce')", + ] + errors = [assertion for assertion in assertions if not eval(assertion)] + assert not errors, "errors occurred:\n{}".format("\n".join(errors)) From 0dd81314f362de7172a8b9e990644e3a48586e57 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 19:15:40 -0400 Subject: [PATCH 16/32] refactor: remove most global vars, set dynamically instead --- src/renee/gui.py | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index 701b1c2..8f17f59 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -13,38 +13,25 @@ get_tmp_dir, get_shared_resources_dir, renee_base, + get_version, ) +from .cache import get_sif_cache_dir from .run import run -# TODO: get rid of all the global variables, get values from CLI flags instead -global DEBUG -DEBUG = True - +# TODO: get rid of all the global variables # TODO: let's use a tmp dir and put these files there instead. see for inspiration:https://github.com/CCBR/RENEE/blob/16d13dca1d5f0f43c7dfda379efb882a67635d17/tests/test_cache.py#L14-L28 global FILES_TO_DELETE -global RENEEDIR -global SIFCACHE -global RENEE -global RENEEVER -global HOSTNAME - -RENEEDIR = os.getenv("RENEEDIR") -SIFCACHE = os.getenv("SIFCACHE") -RENEEVER = os.getenv("RENEEVER") -HOSTNAME = os.getenv("HOSTNAME") -RENNE = renee_base(os.path.join("bin", "renee")) - FILES_TO_DELETE = list() -def launch_gui(sub_args): +def launch_gui(sub_args, debug=True): # get drop down genome+annotation options jsons = get_genomes_dict() genome_annotation_combinations = list(jsons.keys()) genome_annotation_combinations.sort() - if DEBUG: + if debug: print(jsons) - if DEBUG: + if debug: print(genome_annotation_combinations) logo = sg.Image(renee_base(os.path.join("resources", "CCBRlogo.png"))) @@ -85,16 +72,18 @@ def launch_gui(sub_args): sg.Button(button_text="Help", key="--HELP--", font=("Helvetica", 12)), ], ] - if DEBUG: + if debug: print("layout is ready!") - window = sg.Window("RENEE " + RENEEVER, layout, location=(0, 500), finalize=True) - if DEBUG: + window = sg.Window( + f"RENEE {get_version()}", layout, location=(0, 500), finalize=True + ) + if debug: print("window created!") while True: event, values = window.read() - if DEBUG: + if debug: print(event, values) # if any((event != 'Submit')): if event == "--CANCEL--" or event == sg.WIN_CLOSED: @@ -133,9 +122,9 @@ def launch_gui(sub_args): elif not os.path.exists(values["--INDIR--"]) and not os.path.exists( fixpath(values["--INDIR--"]) ): - if DEBUG: + if debug: print(values["--INDIR--"]) - if DEBUG: + if debug: print(fixpath(values["--INDIR--"])) sg.PopupError( "Input folder doesn't exist!!", @@ -146,7 +135,7 @@ def launch_gui(sub_args): continue else: inputfastqs = get_fastqs(values["--INDIR--"]) - if DEBUG: + if debug: print(inputfastqs) if len(inputfastqs) == 0: sg.PopupError( @@ -184,7 +173,7 @@ def launch_gui(sub_args): input=" ".join(inputfastqs), output=values["--OUTDIR--"], genome=genome, - sif_cache=SIFCACHE, + sif_cache=get_sif_cache_dir(), mode="slurm", tmp_dir=get_tmp_dir(), shared_resources=get_shared_resources_dir(), From 2ab9433464b18b0feb5b0e90bf86ba101f3cfd56 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 2 Aug 2024 19:20:56 -0400 Subject: [PATCH 17/32] docs: update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e6ce03..6dba938 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ - Support hg38 release 45 on biowulf & FRCE. (#127, @kelly-sovacool) - Show the name of the pipeline rather than the python script for CLI help messages. (#131, @kelly-sovacool) - Ensure `renee build` creates necessary `config` directory during initialization. (#139, @kelly-sovacool) +- Set default shared singularity SIF directory for biowulf and frce. (#94, @kelly-sovacool) +- Add `renee gui` subcommand to launch the graphical user interface. (#94, @kelly-sovacool) + - Previously, `renee_gui` (with an underscore) was a command in the `ccbrpipeliner` module. ## RENEE 2.5.12 @@ -13,7 +16,6 @@ - Fix RSeQC environments: - Set RSeQC envmodule version to 4.0.0, which synchronizes it with the version in the docker container used by singularity. (#122, @kelly-sovacool) - Update docker with RSeQC's tools properly added to the path. (#123, @kelly-sovacool) -- Set default shared singularity SIF directory for biowulf and frce. (#94, @kelly-sovacool) ## RENEE 2.5.11 From ae566142e2f94b5102c6619546e13ee1a3ffa134 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 10:08:50 -0400 Subject: [PATCH 18/32] chore: reorder imports for linter --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 1fc1704..e338358 100755 --- a/main.py +++ b/main.py @@ -2,12 +2,12 @@ import os import re import sys +from src.renee.__main__ import main # add script directory to the path to allow champagne CLI to work out-of-the-box # without the need to install it via pip first SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src", "renee") sys.path.append(SCRIPT_DIR) -from src.renee.__main__ import main if ( __name__ == "__main__" From 9f9e37ed1e4c218acbb5511b6ed4df53d0ac465e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 10:10:17 -0400 Subject: [PATCH 19/32] test: fix import statement --- tests/test_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cache.py b/tests/test_cache.py index f78c4e5..6854015 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -3,7 +3,7 @@ import os.path import subprocess -from src.renee.cache import get_sif_cache_dir +from renee.src.renee.cache import get_sif_cache_dir renee_run = ( "./bin/renee run " From 637c9cd03b80fcae05dc1477782146a7901da6b4 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 10:18:17 -0400 Subject: [PATCH 20/32] refactor: optionally turn genomes warning into error for GUI --- src/renee/gui.py | 2 +- src/renee/util.py | 30 ++++++++++++++---------------- tests/test_util.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index 8f17f59..7dac3cd 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -26,7 +26,7 @@ def launch_gui(sub_args, debug=True): # get drop down genome+annotation options - jsons = get_genomes_dict() + jsons = get_genomes_dict(error_on_warnings=True) genome_annotation_combinations = list(jsons.keys()) genome_annotation_combinations.sort() if debug: diff --git a/src/renee/util.py b/src/renee/util.py index ad708b5..b4be98a 100644 --- a/src/renee/util.py +++ b/src/renee/util.py @@ -77,41 +77,39 @@ def get_shared_resources_dir(shared_dir, outdir, hpc=get_hpcname()): return shared_dir -def get_genomes_list( - hpcname=get_hpcname(), -): # TODO call get_genomes_dict and extract list; only warn if no genomes found +def get_genomes_list(hpcname=get_hpcname(), error_on_warnings=False): """Get list of genome annotations available for the current platform @return genomes_list """ - genome_config_dir = renee_base(os.path.join("config", "genomes", hpcname)) - json_files = glob.glob(genome_config_dir + "/*.json") - if not json_files: - warnings.warn( - f"WARNING: No Genome Annotation JSONs found in {genome_config_dir}. Please specify a custom genome json file with `--genome`" + return sorted( + list( + get_genomes_dict( + hpcname=hpcname, error_on_warnings=error_on_warnings + ).keys() ) - genomes = [os.path.basename(file).replace(".json", "") for file in json_files] - return sorted(genomes) + ) -def get_genomes_dict( - hpcname=get_hpcname(), -): # TODO option to either warn or error if genomes not found +def get_genomes_dict(hpcname=get_hpcname(), error_on_warnings=False): """Get dictionary of genome annotation versions and the paths to the corresponding JSON files @return genomes_dict { genome_name: json_file_path } """ + if error_on_warnings: + warnings.filterwarnings("error") genomes_dir = renee_base(os.path.join("config", "genomes", hpcname)) if not os.path.exists(genomes_dir): - raise FileNotFoundError(f"ERROR: Folder does not exist : {genomes_dir}") + warnings.warn(f"Folder does not exist: {genomes_dir}") search_term = genomes_dir + "/*.json" json_files = glob.glob(search_term) if len(json_files) == 0: - raise FileNotFoundError( - f"ERROR: No Genome+Annotation JSONs found in : {genomes_dir}" + warnings.warn( + f"No Genome+Annotation JSONs found in {genomes_dir}. Please specify a custom genome json file with `--genome`" ) genomes_dict = { os.path.basename(json_file).replace(".json", ""): json_file for json_file in json_files } + warnings.resetwarnings() return genomes_dict diff --git a/tests/test_util.py b/tests/test_util.py index 45ec9e2..d344e21 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -2,11 +2,15 @@ import io import os import pathlib +import pytest import tempfile +import warnings from renee.src.renee.util import ( renee_base, _cp_r_safe_, + get_genomes_dict, + get_genomes_list, ) @@ -44,3 +48,30 @@ def test_cp_unsafe(): safe_mode=False, ) assert not stdout.getvalue() and "config.yaml" in os.listdir(configdir) + + +def test_get_genomes_warnings(): + with warnings.catch_warnings(record=True) as raised_warnings: + genomes = get_genomes_list(hpcname="notAnOption") + assertions = [ + "len(genomes) == 0", + "len(raised_warnings) == 2", + "raised_warnings[0].category is UserWarning", + "raised_warnings[1].category is UserWarning", + '"Folder does not exist" in str(raised_warnings[0].message)', + '"No Genome+Annotation JSONs found" in str(raised_warnings[1].message)', + ] + scope = locals() # make local variables available to eval() + errors = [assertion for assertion in assertions if not eval(assertion, scope)] + assert not errors, "errors occurred:\n{}".format("\n".join(errors)) + + +def test_get_genomes_error(): + with pytest.raises(UserWarning) as exception_info: + get_genomes_list(hpcname="notAnOption", error_on_warnings=True) + assert "Folder does not exist" in str(exception_info.value) + + +def test_get_genomes_biowulf(): + genomes_dict = get_genomes_dict(hpcname="biowulf") + assert len(genomes_dict) > 10 From 94305104f7df52e369c9fbb8636fd0629c28f38e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 10:53:53 -0400 Subject: [PATCH 21/32] fix: add missing imports --- src/renee/__main__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/renee/__main__.py b/src/renee/__main__.py index 01ccba7..8b7b8d9 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -2,7 +2,7 @@ # -*- coding: UTF-8 -*- """RENEE: Rna sEquencing aNalysis pipElinE: -An highly reproducible and portable RNA-seq data analysises pipeline +An highly reproducible and portable RNA-seq data analysis pipeline About: This is the main entry for the RENEE pipeline. USAGE: @@ -35,6 +35,8 @@ get_genomes_list, get_version, check_python_version, + _cp_r_safe_, + orchestrate, ) # Pipeline Metadata and globals @@ -171,7 +173,9 @@ def unlock(sub_args): cwd=outdir, stderr=subprocess.STDOUT, ) - except subprocess.CalledProcessError as e: + except ( + subprocess.CalledProcessError + ) as e: # TODO: why capture this exception at all? # Unlocking process returned a non-zero exit code sys.exit("{}\n{}".format(e, e.output)) From 1f77f9b126087fd6814286c768bdfa9382308ae3 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 11:11:25 -0400 Subject: [PATCH 22/32] ci: remove pytest init pythonpath in attempt to get pytest to find renee.src.renee on GHA --- pyproject.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 244be44..953aaec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,8 +66,3 @@ renee = "." [tool.setuptools.dynamic] version = {file = "VERSION"} readme = {file = "README.md"} - -[tool.pytest.ini_options] -pythonpath = [ - "src" -] From f6f751b2ddd69877a13971bfdf493795302d7e99 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 15:36:20 -0400 Subject: [PATCH 23/32] chore: remove reference to champagne --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index e338358..716c3d8 100755 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ import sys from src.renee.__main__ import main -# add script directory to the path to allow champagne CLI to work out-of-the-box +# add script directory to the path to allow the CLI to work out-of-the-box # without the need to install it via pip first SCRIPT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src", "renee") sys.path.append(SCRIPT_DIR) From 82478010e7931a33c14e7e352ce9292bce70fe57 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 15:57:59 -0400 Subject: [PATCH 24/32] fix(cli): make sure prog name is renee --- src/renee/__main__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/renee/__main__.py b/src/renee/__main__.py index 8b7b8d9..d29c8fd 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -539,9 +539,7 @@ def parsed_arguments(name, description): description = "{0}{1}{2}".format(c.bold, description, c.end) # Create a top-level parser - parser = argparse.ArgumentParser( - description="{}: {}".format(styled_name, description) - ) + parser = argparse.ArgumentParser(prog="renee", description=description) # Adding Version information parser.add_argument( From 33b59f69ca7d2644d261cc770b802f2677905df6 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 17:56:47 -0400 Subject: [PATCH 25/32] fix: tmpdir needs default option + outdir --- src/renee/gui.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index 7dac3cd..10e2f78 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -168,14 +168,15 @@ def launch_gui(sub_args, debug=True): continue # sg.Popup("Output folder exists... this is probably a re-run ... is it?",location=(0,500)) genome = jsons[values["--ANNOTATION--"]] + output_dir = values["--OUTDIR--"] # create sub args for renee run run_args = argparse.Namespace( input=" ".join(inputfastqs), - output=values["--OUTDIR--"], + output=output_dir, genome=genome, sif_cache=get_sif_cache_dir(), mode="slurm", - tmp_dir=get_tmp_dir(), + tmp_dir=get_tmp_dir("", output_dir), shared_resources=get_shared_resources_dir(), dry_run=True, ) From 9e1266012149f8983d89263753e46f23def32372 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 18:03:44 -0400 Subject: [PATCH 26/32] fix(gui): fix util function usage --- src/renee/gui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index 10e2f78..2349fda 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -176,8 +176,9 @@ def launch_gui(sub_args, debug=True): genome=genome, sif_cache=get_sif_cache_dir(), mode="slurm", + runmode="run", tmp_dir=get_tmp_dir("", output_dir), - shared_resources=get_shared_resources_dir(), + shared_resources=get_shared_resources_dir("", output_dir), dry_run=True, ) # execute dry run and capture stdout/stderr From 68348afa1820f7b5a945f14e498cf9283884131a Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 18:36:10 -0400 Subject: [PATCH 27/32] fix(gui): need all subargs in NameSpace, including defaults --- src/renee/gui.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index 2349fda..3347b9c 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -171,15 +171,21 @@ def launch_gui(sub_args, debug=True): output_dir = values["--OUTDIR--"] # create sub args for renee run run_args = argparse.Namespace( - input=" ".join(inputfastqs), + input=inputfastqs, output=output_dir, genome=genome, - sif_cache=get_sif_cache_dir(), mode="slurm", runmode="run", + dry_run=True, + sif_cache=get_sif_cache_dir(), + singularity_cache=os.environ["SINGULARITY_CACHEDIR"], tmp_dir=get_tmp_dir("", output_dir), shared_resources=get_shared_resources_dir("", output_dir), - dry_run=True, + star_2_pass_basic=False, + small_rna=False, + create_nidap_folder=False, + wait=False, + threads=2, ) # execute dry run and capture stdout/stderr allout = run_in_context(run_args) From c9e99843855efe075a68f8f1c62b71e6cd308beb Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 18:54:15 -0400 Subject: [PATCH 28/32] refactor: move run_in_context() to run submodule also: remove unnecessary sys.exit(0) from dryrun. causes incorrect test failure with pytest --- src/renee/gui.py | 13 +------ src/renee/run.py | 92 +++++++++++++++++++++++++++-------------------- tests/test_run.py | 43 ++++++++++++++++++++++ 3 files changed, 97 insertions(+), 51 deletions(-) create mode 100644 tests/test_run.py diff --git a/src/renee/gui.py b/src/renee/gui.py index 3347b9c..80b99ed 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import contextlib import glob import io import os @@ -16,7 +15,7 @@ get_version, ) from .cache import get_sif_cache_dir -from .run import run +from .run import run_in_context # TODO: get rid of all the global variables # TODO: let's use a tmp dir and put these files there instead. see for inspiration:https://github.com/CCBR/RENEE/blob/16d13dca1d5f0f43c7dfda379efb882a67635d17/tests/test_cache.py#L14-L28 @@ -241,16 +240,6 @@ def launch_gui(sub_args, debug=True): delete_files(FILES_TO_DELETE) -def run_in_context(args): - """Execute the run function in a context manager to capture stdout/stderr""" - with contextlib.redirect_stdout(io.StringIO()) as out_f, contextlib.redirect_stderr( - io.StringIO() - ) as err_f: - run(args) - allout = out_f.getvalue() + "\n" + err_f.getvalue() - return allout - - def copy_to_clipboard(string): r = Tk() r.withdraw() diff --git a/src/renee/run.py b/src/renee/run.py index 3dd35c1..bc7cd0e 100644 --- a/src/renee/run.py +++ b/src/renee/run.py @@ -1,3 +1,5 @@ +import contextlib +import io import json import os import pathlib @@ -80,49 +82,51 @@ def run(sub_args): if sub_args.dry_run: # print singularity bind baths and exit print("\nSingularity Bind Paths:{}".format(all_bind_paths)) - sys.exit(0) - - # Run pipeline - masterjob = orchestrate( - mode=sub_args.mode, - outdir=sub_args.output, - additional_bind_paths=all_bind_paths, - alt_cache=sub_args.singularity_cache, - threads=sub_args.threads, - tmp_dir=get_tmp_dir(sub_args.tmp_dir, sub_args.output), - wait=wait, - hpcname=hpcname, - ) + # end at dry run + else: # continue with real run + # Run pipeline + masterjob = orchestrate( + mode=sub_args.mode, + outdir=sub_args.output, + additional_bind_paths=all_bind_paths, + alt_cache=sub_args.singularity_cache, + threads=sub_args.threads, + tmp_dir=get_tmp_dir(sub_args.tmp_dir, sub_args.output), + wait=wait, + hpcname=hpcname, + ) - # Wait for subprocess to complete, - # this is blocking - masterjob.wait() - - # Relay information about submission - # of the master job or the exit code of the - # pipeline that ran in local mode - if sub_args.mode == "local": - if int(masterjob.returncode) == 0: - print("{} pipeline has successfully completed".format("RENEE")) - else: - fatal( - "{} pipeline failed. Please see standard output for more information.".format( - "RENEE" + # Wait for subprocess to complete, + # this is blocking + masterjob.wait() + + # Relay information about submission + # of the master job or the exit code of the + # pipeline that ran in local mode + if sub_args.mode == "local": + if int(masterjob.returncode) == 0: + print("{} pipeline has successfully completed".format("RENEE")) + else: + fatal( + "{} pipeline failed. Please see standard output for more information.".format( + "RENEE" + ) ) + elif sub_args.mode == "slurm": + jobid = ( + open(os.path.join(sub_args.output, "logfiles", "mjobid.log")) + .read() + .strip() ) - elif sub_args.mode == "slurm": - jobid = ( - open(os.path.join(sub_args.output, "logfiles", "mjobid.log")).read().strip() - ) - if int(masterjob.returncode) == 0: - print("Successfully submitted master job: ", end="") - else: - fatal( - "Error occurred when submitting the master job. Error code = {}".format( - masterjob.returncode + if int(masterjob.returncode) == 0: + print("Successfully submitted master job: ", end="") + else: + fatal( + "Error occurred when submitting the master job. Error code = {}".format( + masterjob.returncode + ) ) - ) - print(jobid) + print(jobid) def resolve_additional_bind_paths(search_paths): @@ -200,3 +204,13 @@ def get_fastq_screen_paths(fastq_screen_confs, match="DATABASE", file_index=-1): db_path = line.strip().split()[file_index] databases.append(db_path) return databases + + +def run_in_context(args): + """Execute the run function in a context manager to capture stdout/stderr""" + with contextlib.redirect_stdout(io.StringIO()) as out_f, contextlib.redirect_stderr( + io.StringIO() + ) as err_f: + run(args) + allout = out_f.getvalue() + "\n" + err_f.getvalue() + return allout diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..11a0ffc --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,43 @@ +import argparse +import glob +import os +import tempfile + +from renee.src.renee.util import ( + get_tmp_dir, + get_shared_resources_dir, + renee_base, +) +from renee.src.renee.cache import get_sif_cache_dir +from renee.src.renee.run import run_in_context +from renee.src.renee.util import get_hpcname + + +def test_dryrun(): + if get_hpcname() == "biowulf": + with tempfile.TemporaryDirectory() as tmp_dir: + run_args = argparse.Namespace( + input=list(glob.glob(os.path.join(renee_base(".tests"), "*.fastq.gz"))), + output=tmp_dir, + genome=os.path.join( + renee_base("config"), "genomes", "biowulf", "hg38_36.json" + ), + mode="slurm", + runmode="run", + dry_run=True, + sif_cache=get_sif_cache_dir(), + singularity_cache=os.environ["SINGULARITY_CACHEDIR"], + tmp_dir=tmp_dir, + shared_resources=None, + star_2_pass_basic=False, + small_rna=False, + create_nidap_folder=False, + wait=False, + threads=2, + ) + # execute dry run and capture stdout/stderr + allout = run_in_context(run_args) + assert ( + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." + in allout + ) From 36c52e0e933c3dfd666972bcaa2e46dc8ba349cf Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 5 Aug 2024 18:55:19 -0400 Subject: [PATCH 29/32] refactor: default tmp dir and shared resources dir are None --- src/renee/gui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index 80b99ed..40bd4f3 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -178,8 +178,8 @@ def launch_gui(sub_args, debug=True): dry_run=True, sif_cache=get_sif_cache_dir(), singularity_cache=os.environ["SINGULARITY_CACHEDIR"], - tmp_dir=get_tmp_dir("", output_dir), - shared_resources=get_shared_resources_dir("", output_dir), + tmp_dir=get_tmp_dir(None, output_dir), + shared_resources=get_shared_resources_dir(None, output_dir), star_2_pass_basic=False, small_rna=False, create_nidap_folder=False, From adf6be904a785deb66b5705f92a0c53ccc9467c2 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 6 Aug 2024 10:06:42 -0400 Subject: [PATCH 30/32] refactor: write get_singularity_cache_dir() same behavior as before, just moved this from orchestrate() to its own function for the GUI to use too --- src/renee/__main__.py | 2 +- src/renee/cache.py | 12 ++++++++++-- src/renee/gui.py | 8 ++++++-- src/renee/util.py | 9 +++------ tests/test_cache.py | 11 ++++++++++- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/renee/__main__.py b/src/renee/__main__.py index d29c8fd..2a46b9c 100755 --- a/src/renee/__main__.py +++ b/src/renee/__main__.py @@ -910,7 +910,7 @@ def parsed_arguments(name, description): type=lambda option: os.path.abspath(os.path.expanduser(option)), required=False, help=argparse.SUPPRESS, - default=get_sif_cache_dir(), + default=get_sif_cache_dir(hpc=get_hpcname()), ) # Create NIDAP output folder diff --git a/src/renee/cache.py b/src/renee/cache.py index 21f9961..a908634 100644 --- a/src/renee/cache.py +++ b/src/renee/cache.py @@ -2,10 +2,18 @@ import os import sys -from .util import get_hpcname + +def get_singularity_cachedir(output_dir, cache_dir=None): + """Returns the singularity cache directory. + If no user-provided cache directory is provided, + the default singularity cache is in the output directory. + """ + if not cache_dir: + cache_dir = os.path.join(output_dir, ".singularity") + return cache_dir -def get_sif_cache_dir(hpc=get_hpcname()): +def get_sif_cache_dir(hpc=None): sif_dir = None if hpc == "biowulf": sif_dir = "/data/CCBR_Pipeliner/SIFS" diff --git a/src/renee/gui.py b/src/renee/gui.py index 40bd4f3..d122dc0 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -13,6 +13,8 @@ get_shared_resources_dir, renee_base, get_version, + get_singularity_cachedir, + get_hpcname, ) from .cache import get_sif_cache_dir from .run import run_in_context @@ -176,8 +178,10 @@ def launch_gui(sub_args, debug=True): mode="slurm", runmode="run", dry_run=True, - sif_cache=get_sif_cache_dir(), - singularity_cache=os.environ["SINGULARITY_CACHEDIR"], + sif_cache=get_sif_cache_dir(hpc=get_hpcname()), + singularity_cache=get_singularity_cachedir( + output_dir, os.environ.get("SINGULARITY_CACHEDIR", None) + ), tmp_dir=get_tmp_dir(None, output_dir), shared_resources=get_shared_resources_dir(None, output_dir), star_2_pass_basic=False, diff --git a/src/renee/util.py b/src/renee/util.py index b4be98a..fb50948 100644 --- a/src/renee/util.py +++ b/src/renee/util.py @@ -5,6 +5,7 @@ import shutil import sys import warnings +from .cache import get_singularity_cachedir def renee_base(rel_path=""): @@ -224,13 +225,9 @@ def orchestrate( # to output directory my_env = {} my_env.update(os.environ) - cache = os.path.join(outdir, ".singularity") - my_env["SINGULARITY_CACHEDIR"] = cache - if alt_cache: - # Override the pipeline's default cache location - my_env["SINGULARITY_CACHEDIR"] = alt_cache - cache = alt_cache + cache = get_singularity_cachedir(output_dir=outdir, cache_dir=alt_cache) + my_env["SINGULARITY_CACHEDIR"] = cache if additional_bind_paths: # Add Bind PATHs for outdir and tmp dir diff --git a/tests/test_cache.py b/tests/test_cache.py index 6854015..846929f 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -3,7 +3,7 @@ import os.path import subprocess -from renee.src.renee.cache import get_sif_cache_dir +from renee.src.renee.cache import get_sif_cache_dir, get_singularity_cachedir renee_run = ( "./bin/renee run " @@ -56,3 +56,12 @@ def test_get_sif_cache_dir(): ] errors = [assertion for assertion in assertions if not eval(assertion)] assert not errors, "errors occurred:\n{}".format("\n".join(errors)) + + +def test_get_singularity_cachedir(): + assertions = [ + "get_singularity_cachedir('outdir') == 'outdir/.singularity'", + "get_singularity_cachedir('outdir', 'cache') == 'cache'", + ] + errors = [assertion for assertion in assertions if not eval(assertion)] + assert not errors, "errors occurred:\n{}".format("\n".join(errors)) From db74a4f6e5c96cc9cf64be1a3a669109cc9056ec Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 6 Aug 2024 11:52:05 -0400 Subject: [PATCH 31/32] fix: do not change fqscreen path with shared resources they are no longer in shared resources dir --- src/renee/gui.py | 2 +- src/renee/setup.py | 5 ----- src/renee/util.py | 2 +- tests/test_run.py | 4 ++-- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/renee/gui.py b/src/renee/gui.py index d122dc0..12aad81 100755 --- a/src/renee/gui.py +++ b/src/renee/gui.py @@ -183,7 +183,7 @@ def launch_gui(sub_args, debug=True): output_dir, os.environ.get("SINGULARITY_CACHEDIR", None) ), tmp_dir=get_tmp_dir(None, output_dir), - shared_resources=get_shared_resources_dir(None, output_dir), + shared_resources=get_shared_resources_dir(None), star_2_pass_basic=False, small_rna=False, create_nidap_folder=False, diff --git a/src/renee/setup.py b/src/renee/setup.py index 9130ae8..ed2bb51 100644 --- a/src/renee/setup.py +++ b/src/renee/setup.py @@ -110,11 +110,6 @@ def setup(sub_args, ifiles, repo_path, output_path): ] = os.path.join( sub_args.shared_resources, "fastq_screen_db", "fastq_screen.conf" ) - config["bin"]["rnaseq"]["tool_parameters"][ - "FASTQ_SCREEN_CONFIG2" - ] = os.path.join( - sub_args.shared_resources, "fastq_screen_db", "fastq_screen_2.conf" - ) config["bin"]["rnaseq"]["tool_parameters"]["KRAKENBACDB"] = os.path.join( sub_args.shared_resources, "20180907_standard_kraken2" ) diff --git a/src/renee/util.py b/src/renee/util.py index fb50948..0ffad74 100644 --- a/src/renee/util.py +++ b/src/renee/util.py @@ -66,7 +66,7 @@ def get_tmp_dir(tmp_dir, outdir, hpc=get_hpcname()): return tmp_dir -def get_shared_resources_dir(shared_dir, outdir, hpc=get_hpcname()): +def get_shared_resources_dir(shared_dir, hpc=get_hpcname()): """Get default shared resources directory for biowulf and frce. Allow user override.""" if not shared_dir: if hpc == "biowulf": diff --git a/tests/test_run.py b/tests/test_run.py index 11a0ffc..229d40a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -9,7 +9,7 @@ renee_base, ) from renee.src.renee.cache import get_sif_cache_dir -from renee.src.renee.run import run_in_context +from renee.src.renee.run import run, run_in_context from renee.src.renee.util import get_hpcname @@ -28,7 +28,7 @@ def test_dryrun(): sif_cache=get_sif_cache_dir(), singularity_cache=os.environ["SINGULARITY_CACHEDIR"], tmp_dir=tmp_dir, - shared_resources=None, + shared_resources=get_shared_resources_dir(None), star_2_pass_basic=False, small_rna=False, create_nidap_folder=False, From 65f07b6ff923084b6f45b5bfa070d77fa9296e55 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 6 Aug 2024 12:40:57 -0400 Subject: [PATCH 32/32] fix: do not change EITHER fqscreen path with shared resources previous commit only fixed it for one, but there are two --- src/renee/setup.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/renee/setup.py b/src/renee/setup.py index ed2bb51..000e3c8 100644 --- a/src/renee/setup.py +++ b/src/renee/setup.py @@ -105,11 +105,6 @@ def setup(sub_args, ifiles, repo_path, output_path): if sub_args.shared_resources: # Update paths to shared resources directory - config["bin"]["rnaseq"]["tool_parameters"][ - "FASTQ_SCREEN_CONFIG" - ] = os.path.join( - sub_args.shared_resources, "fastq_screen_db", "fastq_screen.conf" - ) config["bin"]["rnaseq"]["tool_parameters"]["KRAKENBACDB"] = os.path.join( sub_args.shared_resources, "20180907_standard_kraken2" )