diff --git a/ega-download-wf/main.nf b/ega-download-wf/main.nf index 777b76e..0a8a8bc 100755 --- a/ega-download-wf/main.nf +++ b/ega-download-wf/main.nf @@ -49,7 +49,7 @@ params.pyega3_ega_user="" params.pyega3_ega_pass="" include { downloadPyega3 } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/download-pyega3@0.1.3/main.nf' params([*:params, 'cleanup': false]) -include { downloadAspera } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.1/main.nf' params([*:params, 'cleanup': false]) +include { downloadAspera } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.nf' params([*:params, 'cleanup': false]) include { decryptAspera } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/decrypt-aspera@0.1.1/main.nf' params([*:params, 'cleanup': false]) // please update workflow code as needed diff --git a/ega-download-wf/pkg.json b/ega-download-wf/pkg.json index 227085c..fd9d63a 100644 --- a/ega-download-wf/pkg.json +++ b/ega-download-wf/pkg.json @@ -12,7 +12,7 @@ "url": "https://github.com/icgc-argo/argo-data-submission.git" }, "dependencies": [ - "github.com/icgc-argo/argo-data-submission/download-aspera@0.1.1", + "github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2", "github.com/icgc-argo/argo-data-submission/download-pyega3@0.1.3", "github.com/icgc-argo/argo-data-submission/decrypt-aspera@0.1.1" ], diff --git a/ega-download-wf/tests/input/aspera.tsv b/ega-download-wf/tests/input/aspera.tsv new file mode 100644 index 0000000..961f35a --- /dev/null +++ b/ega-download-wf/tests/input/aspera.tsv @@ -0,0 +1 @@ +ega_file_id path \ No newline at end of file diff --git a/ega-download-wf/tests/input/pyega3.tsv b/ega-download-wf/tests/input/pyega3.tsv index 861925a..e4c889e 100644 --- a/ega-download-wf/tests/input/pyega3.tsv +++ b/ega-download-wf/tests/input/pyega3.tsv @@ -1,2 +1,2 @@ ega_file_id path -EGAF00001770106 EGAF00001770106.bam +EGAF00001770106 diff --git a/ega-download-wf/tests/input/test_rg_3.bam b/ega-download-wf/tests/input/test_rg_3.bam deleted file mode 100644 index ab8a214..0000000 Binary files a/ega-download-wf/tests/input/test_rg_3.bam and /dev/null differ diff --git a/ega-download-wf/tests/local-test-job-aspera.json b/ega-download-wf/tests/local-test-job-aspera.json index 47606e9..6e1dc92 100644 --- a/ega-download-wf/tests/local-test-job-aspera.json +++ b/ega-download-wf/tests/local-test-job-aspera.json @@ -1,9 +1,4 @@ { - "file_info_tsv" : "tests/input/pyega3.tsv", - "pyega3_ega_user" : "ega-test-data@ebi.ac.uk", - "pyega3_ega_pass" : "egarocks", - "download_mode" : "aspera", - "ascp_scp_host" : "", - "ascp_scp_user" : "", - "aspera_scp_pass" : "" + "file_info_tsv" : "input/aspera.tsv", + "download_mode" : "aspera" } \ No newline at end of file diff --git a/ega-download-wf/tests/local-test-job-pyega.json b/ega-download-wf/tests/local-test-job-pyega.json index 165e283..a66404e 100644 --- a/ega-download-wf/tests/local-test-job-pyega.json +++ b/ega-download-wf/tests/local-test-job-pyega.json @@ -1,9 +1,6 @@ { - "file_info_tsv" : "tests/input/pyega3.tsv", + "file_info_tsv" : "input/pyega3.tsv", "pyega3_ega_user" : "ega-test-data@ebi.ac.uk", "pyega3_ega_pass" : "egarocks", - "download_mode" : "pyega3", - "ascp_scp_host" : "", - "ascp_scp_user" : "", - "aspera_scp_pass" : "" + "download_mode" : "pyega3" } \ No newline at end of file diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/.dockerignore b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/.dockerignore new file mode 100644 index 0000000..71266ec --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/.dockerignore @@ -0,0 +1,5 @@ +.gitignore +.nextflow* +tests +work +outdir diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/Dockerfile b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/Dockerfile new file mode 100644 index 0000000..6e694f4 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/Dockerfile @@ -0,0 +1,34 @@ +FROM ubuntu:18.04 + +LABEL org.opencontainers.image.source https://github.com/icgc-argo/argo-data-submission +LABEL org.opencontainers.image.authors Edmund Su (edmund.su@oicr.on.ca) +LABEL org.opencontainers.image.title ICGC ARGO Data download image + +RUN apt-get update && \ + apt-get install -y python3-pip python3-dev software-properties-common curl && \ + apt-get install -y libz-dev pkg-config libtool m4 autotools-dev automake libncurses5-dev libbz2-dev liblzma-dev + +RUN mkdir /tools + +# Install Ascp + +ENV PATH="/tools:${PATH}" +ENV PATH="~/.aspera/connect/bin/:${PATH}" +COPY *.py /tools/ + +RUN groupadd -g 1000 ubuntu &&\ + useradd -l -u 1000 -g ubuntu ubuntu &&\ + install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu + +RUN echo "PATH="${PATH}"" >> /etc/environment +USER ubuntu + +RUN cd /tmp &&\ + curl -sSL -o ibm-aspera-connect_4.1.3.93_linux.tar.gz --retry 10 https://d3gcli72yxqn2z.cloudfront.net/connect_latest/v4/bin/ibm-aspera-connect_4.1.3.93_linux.tar.gz &&\ + tar -zxvf ibm-aspera-connect_4.1.3.93_linux.tar.gz && \ + chmod 775 ibm-aspera-connect_4.1.3.93_linux.sh && \ + ./ibm-aspera-connect_4.1.3.93_linux.sh + +ENTRYPOINT ["/usr/bin/env"] + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/README.md b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/README.md new file mode 100644 index 0000000..6259821 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/README.md @@ -0,0 +1,54 @@ +# Package download-aspera + +Wrapper for Aspera to download files + + +## Inputs + +See contents of `param-file` + + +## Outputs + +Supplied file to be downloaded + + +## Usage + +#### Contents of Param-file +``` +{ + "target_file" : "/aspera-test-dir-large/100MB", ### File to be download + "EGAF" : "EGAF000001", ### Associated EGAF id + "ASCP_SCP_HOST" : "demo.asperasoft.com", ### Host server address + "ASCP_SCP_USER" : "aspera", ### Host provided username + "ASPERA_SCP_PASS" : "demoaspera" ### Host provided password +} +``` + +#### Test run +`nextflow run checker.nf -params-file test-job-aspera.json` + +#### IRL run +``` +nextflow run ../main.nf -params-file test-job-aspera.json +``` + +### Run the package directly + +With inputs prepared, you should be able to run the package directly using the following command. +Please replace the params file with a real one (with all required parameters and input files). Example +params file(s) can be found in the `tests` folder. + +``` +nextflow run icgc-argo/argo-data-submission/download-aspera/main.nf -r download-aspera.v0.1.0 -params-file +``` + +### Import the package as a dependency + +To import this package into another package as a dependency, please follow these steps at the +importing package side: + +1. add this package's URI `github.com/icgc-argo/argo-data-submission/download-aspera@0.1.0` in the `dependencies` list of the `pkg.json` file +2. run `wfpm install` to install the dependency +3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.0/main.nf` diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.nf b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.nf new file mode 100755 index 0000000..a3c39be --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.nf @@ -0,0 +1,92 @@ +#!/usr/bin/env nextflow + +/* + Copyright (C) 2022, icgc-argo + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Edmund Su + Linda Xiang +*/ + +/********************************************************************/ +/* this block is auto-generated based on info from pkg.json where */ +/* changes can be made if needed, do NOT modify this block manually */ +nextflow.enable.dsl = 2 +version = '0.1.2' + +container = [ + 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.download-aspera' +] +default_container_registry = 'ghcr.io' +/********************************************************************/ + + +// universal params go here +params.container_registry = "" +params.container_version = "" +params.container = "" + +params.cpus = 1 +params.mem = 1 // GB +params.publish_dir = "" // set to empty string will disable publishDir + + +// tool specific parmas go here, add / change as needed +params.target_file='' +params.ega_file_id='' +params.ascp_scp_host='' +params.ascp_scp_user='' +params.aspera_scp_pass='' + +process downloadAspera { + container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" + publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false + errorStrategy 'terminate' + cpus params.cpus + memory "${params.mem} GB" + + input: // input, make update as needed + val target_file + val ega_file_id + val dependency + + output: // output, make update as needed + path "${ega_file_id}/${regexed_file_name}", emit: output_file + + script: + // add and initialize variables here as needed + regexed_file_name=target_file.replaceAll(/^.*\//,'') + """ + mkdir ${ega_file_id} + export ASCP_SCP_HOST=${params.ascp_scp_host} + export ASCP_SCP_USER=${params.ascp_scp_user} + export ASPERA_SCP_PASS=${params.aspera_scp_pass} + python3.6 /tools/main.py \\ + -f ${target_file} \\ + -o ${ega_file_id} \\ + > download.log 2>&1 + """ +} + +// this provides an entry point for this main script, so it can be run directly without clone the repo +// using this command: nextflow run ///.nf -r .v --params-file xxx +workflow { + downloadAspera( + params.target_file, + params.ega_file_id, + true + ) +} \ No newline at end of file diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.py b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.py new file mode 100755 index 0000000..96b9438 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2022, icgc-argo + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Edmund Su +""" + +import os +import sys +import argparse +import subprocess +import errno +import shutil +import random +import string + + +def main(): + """ + Python implementation of tool: download-aspera + + This is auto-generated Python code, please update as needed! + """ + + parser = argparse.ArgumentParser(description='Download files from EGA aspera server') + parser.add_argument('-f', '--file_name', dest="file_name", help="EGA file name", required=True) + parser.add_argument('-o', '--output', dest='output', help="Output file folder", required=True) + results = parser.parse_args() + + file_list = randomword(60)+".txt" + + try: + try: + # Check if ASCP_EGA_HOST environment variable exists: ega host + os.environ['ASCP_SCP_HOST'] + + # Check if ASCP_EGA_USER environment variable exists: ega username + os.environ['ASCP_SCP_USER'] + + # Check if ASPERA_SCP_PASS environment variable exists: ascpera password + os.environ['ASPERA_SCP_PASS'] + except KeyError: + raise KeyError("Global Variable: ASCP_SCP_HOST, ASCP_SCP_USER and ASPERA_SCP_PASS must exist in the environment.") + + # Raise an error if the output file exists + if os.path.isfile(results.output+"/"+results.file_name): + raise ValueError("Output file already exists") + + # Write the file to be downloaded to the temporary file + with open(file_list, 'w') as f: + f.write(results.file_name) + f.write('\n') + + # Download process + result=subprocess.run(['/home/ubuntu/.aspera/connect/bin/ascp','-k','1','-QTl','100m','--file-list='+file_list,'--partial-file-suffix=PART','--ignore-host-key','--mode=recv','--host='+os.environ['ASCP_SCP_HOST'],'--user='+os.environ['ASCP_SCP_USER'],results.output]) + + if result.returncode==0: + subprocess.run("touch "+results.output+"/DOWNLOAD.SUCCESS",shell=True) + else: + subprocess.run("touch "+results.output+"/DOWNLOAD.FAILURE",shell=True) + + # Deletion of temporary elements + os.remove(file_list) + except Exception as err: + print(str(err)) + if os.path.isfile(file_list): + os.remove(file_list) + exit(1) + + + +def randomword(length): + return(''.join(random.choice(string.ascii_lowercase) for i in range(length))) + +if __name__ == "__main__": + main() diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/nextflow.config b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/nextflow.config new file mode 100644 index 0000000..f2cd1e3 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/nextflow.config @@ -0,0 +1,4 @@ +docker { + enabled = true + runOptions = '-u \$(id -u):\$(id -g)' +} diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/pkg.json b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/pkg.json new file mode 100644 index 0000000..8074466 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/pkg.json @@ -0,0 +1,37 @@ +{ + "name": "download-aspera", + "version": "0.1.2", + "description": "Wrapper to utilize Aspera for EGA downloads", + "main": "main.nf", + "deprecated": false, + "keywords": [ + "bioinformatics", + "seq", + "qc metrics" + ], + "repository": { + "type": "git", + "url": "https://github.com/icgc-argo/argo-data-submission.git" + }, + "container": { + "registries": [ + { + "registry": "ghcr.io", + "type": "docker", + "org": "icgc-argo", + "default": true + } + ] + }, + "dependencies": [], + "devDependencies": [], + "contributors": [ + { + "name": "Edmund Su", + "email": "edmund.su@oicr.on.ca" + } + ], + "license": "GNU Affero General Public License v3", + "bugReport": "https://github.com/icgc-argo/argo-data-submission/issues", + "homepage": "https://github.com/icgc-argo/argo-data-submission#readme" +} \ No newline at end of file diff --git a/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/wfpr_modules b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/wfpr_modules new file mode 120000 index 0000000..1cc74ba --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/wfpr_modules @@ -0,0 +1 @@ +../../../../../wfpr_modules \ No newline at end of file