From fb38a02f2378b3e09264952342c255fc18bf9a99 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Wed, 7 Feb 2024 11:50:30 -0500 Subject: [PATCH] Use tmpdir/lscratch with fastqc due to gpfs issue --- ervx | 6 ++-- resources/cluster.json | 4 ++- workflow/rules/paired-end.smk | 54 +++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/ervx b/ervx index 9917b9f..1594803 100755 --- a/ervx +++ b/ervx @@ -1,7 +1,9 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- -"""RNA-seek: an highly reproducible and portable RNA-seq pipeline +""" +This pipeline was bootstrapped from: https://github.com/skchronicles/RNA-seek +RNA-seek: an highly reproducible and portable RNA-seq pipeline About: This is the main entry for the RNA-seek pipeline. USAGE: @@ -21,7 +23,7 @@ import argparse # potential python3 3rd party package, added in python/3.5 # Pipeline Metadata and globals __author__ = 'Skyler Kuhn' -__version__ = 'v1.9.0' +__version__ = 'v0.1.0' __email__ = 'kuhnsa@nih.gov' __home__ = os.path.dirname(os.path.abspath(__file__)) _name = os.path.basename(sys.argv[0]) diff --git a/resources/cluster.json b/resources/cluster.json index deccf36..da51f93 100644 --- a/resources/cluster.json +++ b/resources/cluster.json @@ -21,6 +21,7 @@ "fastqc": { "mem": "24g", "threads": "32", + "gres": "lscratch:64", "time": "1-00:00:00" }, "fastq_screen": { @@ -56,7 +57,8 @@ "time": "1-00:00:00" }, "rawfastqc": { - "threads": "8" + "threads": "8", + "gres": "lscratch:64" }, "rsem": { "gres": "lscratch:500", diff --git a/workflow/rules/paired-end.smk b/workflow/rules/paired-end.smk index 50eab5e..69f28fc 100644 --- a/workflow/rules/paired-end.smk +++ b/workflow/rules/paired-end.smk @@ -50,15 +50,40 @@ rule rawfastqc: output: join(workpath,"rawQC","{name}.R1_fastqc.zip"), join(workpath,"rawQC","{name}.R2_fastqc.zip"), + tmpdir=tmpdir, priority: 2 params: rname='pl_rawfastqc', outdir=join(workpath,"rawQC"), + tmpdir=tmpdir, threads: int(allocated("threads", "rawfastqc", cluster)), envmodules: config['bin'][pfamily]['tool_versions']['FASTQCVER'] container: config['images']['fastqc'] shell: """ - fastqc {input.R1} {input.R2} -t {threads} -o {params.outdir}; + # Setups temporary directory for + # intermediate files with built-in + # mechanism for deletion on exit + if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi + tmp=$(mktemp -d -p "{params.tmpdir}") + trap 'rm -rf "${{tmp}}"' EXIT + + # Running fastqc with local + # disk or a tmpdir, fastqc + # has been observed to lock + # up gpfs filesystems, adding + # this on request by HPC staff + fastqc \\ + {input.R1} \\ + {input.R2} \\ + -t {threads} \\ + -o "${{tmp}}" + + # Copy output files from tmpdir + # to output directory + find "${{tmp}}" \\ + -type f \\ + \\( -name '*.html' -o -name '*.zip' \\) \\ + -exec cp {{}} {params.outdir} \\; """ @@ -119,12 +144,35 @@ rule fastqc: params: rname='pl_fastqc', outdir=join(workpath,"QC"), - getrl=join("workflow", "scripts", "get_read_length.py"), + tmpdir=tmpdir, threads: int(allocated("threads", "fastqc", cluster)), envmodules: config['bin'][pfamily]['tool_versions']['FASTQCVER'] container: config['images']['fastqc'] shell: """ - fastqc {input.R1} {input.R2} -t {threads} -o {params.outdir}; + # Setups temporary directory for + # intermediate files with built-in + # mechanism for deletion on exit + if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi + tmp=$(mktemp -d -p "{params.tmpdir}") + trap 'rm -rf "${{tmp}}"' EXIT + + # Running fastqc with local + # disk or a tmpdir, fastqc + # has been observed to lock + # up gpfs filesystems, adding + # this on request by HPC staff + fastqc \\ + {input.R1} \\ + {input.R2} \\ + -t {threads} \\ + -o "${{tmp}}" + + # Copy output files from tmpdir + # to output directory + find "${{tmp}}" \\ + -type f \\ + \\( -name '*.html' -o -name '*.zip' \\) \\ + -exec cp {{}} {params.outdir} \\; """