Skip to content

Commit

Permalink
Merge pull request #151 from CCBR/use-tools-pkg
Browse files Browse the repository at this point in the history
Use tools package
  • Loading branch information
samarth8392 committed Aug 26, 2024
2 parents 181a4bf + b67417e commit 5b9daeb
Show file tree
Hide file tree
Showing 12 changed files with 248 additions and 526 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ classifiers = [
requires-python = ">=3.11"
dependencies = [
"argparse",
"ccbr_tools@git+https://github.com/CCBR/Tools",
"Click >= 8.1.3",
"PySimpleGui < 5",
"snakemake >= 7.32, < 8",
Expand Down
47 changes: 30 additions & 17 deletions src/renee/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
"""

# Python standard library
from __future__ import print_function
from shutil import copy
import json
import os
Expand All @@ -22,22 +21,22 @@

# 3rd party imports from pypi
import argparse

# local imports
from .cache import get_sif_cache_dir
from .run import run
from .dryrun import dryrun
from .gui import launch_gui
from .conditions import fatal
from .util import (
from ccbr_tools.pipeline.util import (
get_hpcname,
get_tmp_dir,
get_genomes_list,
get_version,
check_python_version,
_cp_r_safe_,
orchestrate,
)
from ccbr_tools.pipeline.cache import get_sif_cache_dir

# local imports
from .run import run
from .dryrun import dryrun
from .gui import launch_gui
from .conditions import fatal
from .util import renee_base, get_version
from .orchestrate import orchestrate

# Pipeline Metadata and globals
RENEE_PATH = os.path.dirname(
Expand Down Expand Up @@ -398,9 +397,11 @@ def build(sub_args):
)
)
elif sub_args.mode == "slurm":
jobid = (
open(os.path.join(sub_args.output, "logfiles", "bjobid.log")).read().strip()
)
with open(
os.path.join(sub_args.output, "logfiles", "bjobid.log"), "r"
) as infile:
jobid = infile.read().strip()

if int(masterjob.returncode) == 0:
print("Successfully submitted master job: ", end="")
else:
Expand Down Expand Up @@ -770,7 +771,12 @@ def parsed_arguments(name, description):
{2}{3}Prebuilt genome+annotation combos:{4}
{5}
""".format(
"renee", __version__, c.bold, c.url, c.end, list(get_genomes_list())
"renee",
__version__,
c.bold,
c.url,
c.end,
list(get_genomes_list(repo_base=renee_base)),
)
)

Expand Down Expand Up @@ -817,7 +823,9 @@ def parsed_arguments(name, description):
"--genome",
required=True,
type=lambda option: str(
genome_options(subparser_run, option, get_genomes_list())
genome_options(
subparser_run, option, get_genomes_list(repo_base=renee_base)
)
),
help=argparse.SUPPRESS,
)
Expand Down Expand Up @@ -1126,7 +1134,12 @@ def parsed_arguments(name, description):
{2}{3}Prebuilt genome+annotation combos:{4}
{5}
""".format(
"renee", __version__, c.bold, c.url, c.end, list(get_genomes_list())
"renee",
__version__,
c.bold,
c.url,
c.end,
list(get_genomes_list(repo_base=renee_base)),
)
)

Expand Down
63 changes: 0 additions & 63 deletions src/renee/cache.py

This file was deleted.

19 changes: 9 additions & 10 deletions src/renee/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@
import sys
from tkinter import Tk

from .util import (
from ccbr_tools.pipeline.util import (
get_genomes_dict,
get_tmp_dir,
get_shared_resources_dir,
renee_base,
get_version,
get_singularity_cachedir,
get_hpcname,
)
from .cache import get_sif_cache_dir
from .run import run_in_context
from ccbr_tools.pipeline.cache import get_sif_cache_dir, get_singularity_cachedir
from ccbr_tools.shell import exec_in_context

from .util import get_version, renee_base, get_shared_resources_dir
from .run import run

# TODO: get rid of all the global variables
# TODO: let's use a tmp dir and put these files there instead. see for inspiration:https://github.com/CCBR/RENEE/blob/16d13dca1d5f0f43c7dfda379efb882a67635d17/tests/test_cache.py#L14-L28
Expand All @@ -27,7 +26,7 @@

def launch_gui(sub_args, debug=True):
# get drop down genome+annotation options
jsons = get_genomes_dict(error_on_warnings=True)
jsons = get_genomes_dict(repo_base=renee_base, error_on_warnings=True)
genome_annotation_combinations = list(jsons.keys())
genome_annotation_combinations.sort()
if debug:
Expand Down Expand Up @@ -191,7 +190,7 @@ def launch_gui(sub_args, debug=True):
threads=2,
)
# execute dry run and capture stdout/stderr
allout = run_in_context(run_args)
allout = exec_in_context(run, run_args)
sg.popup_scrolled(
allout,
title="Dryrun:STDOUT/STDERR",
Expand All @@ -211,7 +210,7 @@ def launch_gui(sub_args, debug=True):
if ch == "Yes":
run_args.dry_run = False
# execute live run
allout = run_in_context(run_args)
allout = exec_in_context(run, run_args)
sg.popup_scrolled(
allout,
title="Dryrun:STDOUT/STDERR",
Expand Down
95 changes: 1 addition & 94 deletions src/renee/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
import re
import sys

from .util import (
_cp_r_safe_,
)
from ccbr_tools.pipeline.util import _cp_r_safe_, _sym_safe_


def initialize(sub_args, repo_path, output_path):
Expand Down Expand Up @@ -51,94 +49,3 @@ def initialize(sub_args, repo_path, output_path):
inputs = _sym_safe_(input_data=sub_args.input, target=output_path)

return inputs


def _sym_safe_(input_data, target):
"""Creates re-named symlinks for each FastQ file provided
as input. If a symlink already exists, it will not try to create a new symlink.
If relative source PATH is provided, it will be converted to an absolute PATH.
@param input_data <list[<str>]>:
List of input files to symlink to target location
@param target <str>:
Target path to copy templates and required resources
@return input_fastqs list[<str>]:
List of renamed input FastQs
"""
input_fastqs = [] # store renamed fastq file names
for file in input_data:
filename = os.path.basename(file)
renamed = os.path.join(target, rename(filename))
input_fastqs.append(renamed)

if not os.path.exists(renamed):
# Create a symlink if it does not already exist
# Follow source symlinks to resolve any binding issues
os.symlink(os.path.abspath(os.path.realpath(file)), renamed)

return input_fastqs


def rename(filename):
"""Dynamically renames FastQ file to have one of the following extensions: *.R1.fastq.gz, *.R2.fastq.gz
To automatically rename the fastq files, a few assumptions are made. If the extension of the
FastQ file cannot be inferred, an exception is raised telling the user to fix the filename
of the fastq files.
@param filename <str>:
Original name of file to be renamed
@return filename <str>:
A renamed FastQ filename
"""
# Covers common extensions from SF, SRA, EBI, TCGA, and external sequencing providers
# key = regex to match string and value = how it will be renamed
extensions = {
# Matches: _R[12]_fastq.gz, _R[12].fastq.gz, _R[12]_fq.gz, etc.
".R1.f(ast)?q.gz$": ".R1.fastq.gz",
".R2.f(ast)?q.gz$": ".R2.fastq.gz",
# Matches: _R[12]_001_fastq_gz, _R[12].001.fastq.gz, _R[12]_001.fq.gz, etc.
# Capture lane information as named group
".R1.(?P<lane>...).f(ast)?q.gz$": ".R1.fastq.gz",
".R2.(?P<lane>...).f(ast)?q.gz$": ".R2.fastq.gz",
# Matches: _[12].fastq.gz, _[12].fq.gz, _[12]_fastq_gz, etc.
"_1.f(ast)?q.gz$": ".R1.fastq.gz",
"_2.f(ast)?q.gz$": ".R2.fastq.gz",
}

if filename.endswith(".R1.fastq.gz") or filename.endswith(".R2.fastq.gz"):
# Filename is already in the correct format
return filename

converted = False
for regex, new_ext in extensions.items():
matched = re.search(regex, filename)
if matched:
# regex matches with a pattern in extensions
converted = True
# Try to get substring for named group lane, retain this in new file extension
# Come back to this later, I am not sure if this is necessary
# That string maybe static (i.e. always the same)
# https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/NamingConvention_FASTQ-files-swBS.htm#
try:
new_ext = "_{}{}".format(matched.group("lane"), new_ext)
except IndexError:
pass # Does not contain the named group lane

filename = re.sub(regex, new_ext, filename)
break # only rename once

if not converted:
raise NameError(
"""\n\tFatal: Failed to rename provided input '{}'!
Cannot determine the extension of the user provided input file.
Please rename the file list above before trying again.
Here is example of acceptable input file extensions:
sampleName.R1.fastq.gz sampleName.R2.fastq.gz
sampleName_R1_001.fastq.gz sampleName_R2_001.fastq.gz
sampleName_1.fastq.gz sampleName_2.fastq.gz
Please also check that your input files are gzipped?
If they are not, please gzip them before proceeding again.
""".format(
filename
)
)

return filename
Loading

0 comments on commit 5b9daeb

Please sign in to comment.