-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
aa488d9
commit 88a29e0
Showing
15 changed files
with
327 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ega_file_id path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
ega_file_id path | ||
EGAF00001770106 EGAF00001770106.bam | ||
EGAF00001770106 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,4 @@ | ||
{ | ||
"file_info_tsv" : "tests/input/pyega3.tsv", | ||
"pyega3_ega_user" : "ega-test-data@ebi.ac.uk", | ||
"pyega3_ega_pass" : "egarocks", | ||
"download_mode" : "aspera", | ||
"ascp_scp_host" : "", | ||
"ascp_scp_user" : "", | ||
"aspera_scp_pass" : "" | ||
"file_info_tsv" : "input/aspera.tsv", | ||
"download_mode" : "aspera" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,6 @@ | ||
{ | ||
"file_info_tsv" : "tests/input/pyega3.tsv", | ||
"file_info_tsv" : "input/pyega3.tsv", | ||
"pyega3_ega_user" : "ega-test-data@ebi.ac.uk", | ||
"pyega3_ega_pass" : "egarocks", | ||
"download_mode" : "pyega3", | ||
"ascp_scp_host" : "", | ||
"ascp_scp_user" : "", | ||
"aspera_scp_pass" : "" | ||
"download_mode" : "pyega3" | ||
} |
5 changes: 5 additions & 0 deletions
5
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/.dockerignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.gitignore | ||
.nextflow* | ||
tests | ||
work | ||
outdir |
34 changes: 34 additions & 0 deletions
34
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
FROM ubuntu:18.04 | ||
|
||
LABEL org.opencontainers.image.source https://github.com/icgc-argo/argo-data-submission | ||
LABEL org.opencontainers.image.authors Edmund Su (edmund.su@oicr.on.ca) | ||
LABEL org.opencontainers.image.title ICGC ARGO Data download image | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y python3-pip python3-dev software-properties-common curl && \ | ||
apt-get install -y libz-dev pkg-config libtool m4 autotools-dev automake libncurses5-dev libbz2-dev liblzma-dev | ||
|
||
RUN mkdir /tools | ||
|
||
# Install Ascp | ||
|
||
ENV PATH="/tools:${PATH}" | ||
ENV PATH="~/.aspera/connect/bin/:${PATH}" | ||
COPY *.py /tools/ | ||
|
||
RUN groupadd -g 1000 ubuntu &&\ | ||
useradd -l -u 1000 -g ubuntu ubuntu &&\ | ||
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu | ||
|
||
RUN echo "PATH="${PATH}"" >> /etc/environment | ||
USER ubuntu | ||
|
||
RUN cd /tmp &&\ | ||
curl -sSL -o ibm-aspera-connect_4.1.3.93_linux.tar.gz --retry 10 https://d3gcli72yxqn2z.cloudfront.net/connect_latest/v4/bin/ibm-aspera-connect_4.1.3.93_linux.tar.gz &&\ | ||
tar -zxvf ibm-aspera-connect_4.1.3.93_linux.tar.gz && \ | ||
chmod 775 ibm-aspera-connect_4.1.3.93_linux.sh && \ | ||
./ibm-aspera-connect_4.1.3.93_linux.sh | ||
|
||
ENTRYPOINT ["/usr/bin/env"] | ||
|
||
CMD ["/bin/bash"] |
54 changes: 54 additions & 0 deletions
54
...dules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Package download-aspera | ||
|
||
Wrapper for Aspera to download files | ||
|
||
|
||
## Inputs | ||
|
||
See contents of `param-file` | ||
|
||
|
||
## Outputs | ||
|
||
Supplied file to be downloaded | ||
|
||
|
||
## Usage | ||
|
||
#### Contents of Param-file | ||
``` | ||
{ | ||
"target_file" : "/aspera-test-dir-large/100MB", ### File to be download | ||
"EGAF" : "EGAF000001", ### Associated EGAF id | ||
"ASCP_SCP_HOST" : "demo.asperasoft.com", ### Host server address | ||
"ASCP_SCP_USER" : "aspera", ### Host provided username | ||
"ASPERA_SCP_PASS" : "demoaspera" ### Host provided password | ||
} | ||
``` | ||
|
||
#### Test run | ||
`nextflow run checker.nf -params-file test-job-aspera.json` | ||
|
||
#### IRL run | ||
``` | ||
nextflow run ../main.nf -params-file test-job-aspera.json | ||
``` | ||
|
||
### Run the package directly | ||
|
||
With inputs prepared, you should be able to run the package directly using the following command. | ||
Please replace the params file with a real one (with all required parameters and input files). Example | ||
params file(s) can be found in the `tests` folder. | ||
|
||
``` | ||
nextflow run icgc-argo/argo-data-submission/download-aspera/main.nf -r download-aspera.v0.1.0 -params-file <your-params-json-file> | ||
``` | ||
|
||
### Import the package as a dependency | ||
|
||
To import this package into another package as a dependency, please follow these steps at the | ||
importing package side: | ||
|
||
1. add this package's URI `github.com/icgc-argo/argo-data-submission/download-aspera@0.1.0` in the `dependencies` list of the `pkg.json` file | ||
2. run `wfpm install` to install the dependency | ||
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.0/main.nf` |
92 changes: 92 additions & 0 deletions
92
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.nf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
Copyright (C) 2022, icgc-argo | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU Affero General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU Affero General Public License for more details. | ||
You should have received a copy of the GNU Affero General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
Authors: | ||
Edmund Su | ||
Linda Xiang | ||
*/ | ||
|
||
/********************************************************************/ | ||
/* this block is auto-generated based on info from pkg.json where */ | ||
/* changes can be made if needed, do NOT modify this block manually */ | ||
nextflow.enable.dsl = 2 | ||
version = '0.1.2' | ||
|
||
container = [ | ||
'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.download-aspera' | ||
] | ||
default_container_registry = 'ghcr.io' | ||
/********************************************************************/ | ||
|
||
|
||
// universal params go here | ||
params.container_registry = "" | ||
params.container_version = "" | ||
params.container = "" | ||
|
||
params.cpus = 1 | ||
params.mem = 1 // GB | ||
params.publish_dir = "" // set to empty string will disable publishDir | ||
|
||
|
||
// tool specific parmas go here, add / change as needed | ||
params.target_file='' | ||
params.ega_file_id='' | ||
params.ascp_scp_host='' | ||
params.ascp_scp_user='' | ||
params.aspera_scp_pass='' | ||
|
||
process downloadAspera { | ||
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" | ||
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false | ||
errorStrategy 'terminate' | ||
cpus params.cpus | ||
memory "${params.mem} GB" | ||
|
||
input: // input, make update as needed | ||
val target_file | ||
val ega_file_id | ||
val dependency | ||
|
||
output: // output, make update as needed | ||
path "${ega_file_id}/${regexed_file_name}", emit: output_file | ||
|
||
script: | ||
// add and initialize variables here as needed | ||
regexed_file_name=target_file.replaceAll(/^.*\//,'') | ||
""" | ||
mkdir ${ega_file_id} | ||
export ASCP_SCP_HOST=${params.ascp_scp_host} | ||
export ASCP_SCP_USER=${params.ascp_scp_user} | ||
export ASPERA_SCP_PASS=${params.aspera_scp_pass} | ||
python3.6 /tools/main.py \\ | ||
-f ${target_file} \\ | ||
-o ${ega_file_id} \\ | ||
> download.log 2>&1 | ||
""" | ||
} | ||
|
||
// this provides an entry point for this main script, so it can be run directly without clone the repo | ||
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx | ||
workflow { | ||
downloadAspera( | ||
params.target_file, | ||
params.ega_file_id, | ||
true | ||
) | ||
} |
92 changes: 92 additions & 0 deletions
92
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
Copyright (C) 2022, icgc-argo | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU Affero General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU Affero General Public License for more details. | ||
You should have received a copy of the GNU Affero General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
Authors: | ||
Edmund Su | ||
""" | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import subprocess | ||
import errno | ||
import shutil | ||
import random | ||
import string | ||
|
||
|
||
def main(): | ||
""" | ||
Python implementation of tool: download-aspera | ||
This is auto-generated Python code, please update as needed! | ||
""" | ||
|
||
parser = argparse.ArgumentParser(description='Download files from EGA aspera server') | ||
parser.add_argument('-f', '--file_name', dest="file_name", help="EGA file name", required=True) | ||
parser.add_argument('-o', '--output', dest='output', help="Output file folder", required=True) | ||
results = parser.parse_args() | ||
|
||
file_list = randomword(60)+".txt" | ||
|
||
try: | ||
try: | ||
# Check if ASCP_EGA_HOST environment variable exists: ega host | ||
os.environ['ASCP_SCP_HOST'] | ||
|
||
# Check if ASCP_EGA_USER environment variable exists: ega username | ||
os.environ['ASCP_SCP_USER'] | ||
|
||
# Check if ASPERA_SCP_PASS environment variable exists: ascpera password | ||
os.environ['ASPERA_SCP_PASS'] | ||
except KeyError: | ||
raise KeyError("Global Variable: ASCP_SCP_HOST, ASCP_SCP_USER and ASPERA_SCP_PASS must exist in the environment.") | ||
|
||
# Raise an error if the output file exists | ||
if os.path.isfile(results.output+"/"+results.file_name): | ||
raise ValueError("Output file already exists") | ||
|
||
# Write the file to be downloaded to the temporary file | ||
with open(file_list, 'w') as f: | ||
f.write(results.file_name) | ||
f.write('\n') | ||
|
||
# Download process | ||
result=subprocess.run(['/home/ubuntu/.aspera/connect/bin/ascp','-k','1','-QTl','100m','--file-list='+file_list,'--partial-file-suffix=PART','--ignore-host-key','--mode=recv','--host='+os.environ['ASCP_SCP_HOST'],'--user='+os.environ['ASCP_SCP_USER'],results.output]) | ||
|
||
if result.returncode==0: | ||
subprocess.run("touch "+results.output+"/DOWNLOAD.SUCCESS",shell=True) | ||
else: | ||
subprocess.run("touch "+results.output+"/DOWNLOAD.FAILURE",shell=True) | ||
|
||
# Deletion of temporary elements | ||
os.remove(file_list) | ||
except Exception as err: | ||
print(str(err)) | ||
if os.path.isfile(file_list): | ||
os.remove(file_list) | ||
exit(1) | ||
|
||
|
||
|
||
def randomword(length): | ||
return(''.join(random.choice(string.ascii_lowercase) for i in range(length))) | ||
|
||
if __name__ == "__main__": | ||
main() |
4 changes: 4 additions & 0 deletions
4
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/nextflow.config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
docker { | ||
enabled = true | ||
runOptions = '-u \$(id -u):\$(id -g)' | ||
} |
37 changes: 37 additions & 0 deletions
37
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/pkg.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"name": "download-aspera", | ||
"version": "0.1.2", | ||
"description": "Wrapper to utilize Aspera for EGA downloads", | ||
"main": "main.nf", | ||
"deprecated": false, | ||
"keywords": [ | ||
"bioinformatics", | ||
"seq", | ||
"qc metrics" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/icgc-argo/argo-data-submission.git" | ||
}, | ||
"container": { | ||
"registries": [ | ||
{ | ||
"registry": "ghcr.io", | ||
"type": "docker", | ||
"org": "icgc-argo", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"dependencies": [], | ||
"devDependencies": [], | ||
"contributors": [ | ||
{ | ||
"name": "Edmund Su", | ||
"email": "edmund.su@oicr.on.ca" | ||
} | ||
], | ||
"license": "GNU Affero General Public License v3", | ||
"bugReport": "https://github.com/icgc-argo/argo-data-submission/issues", | ||
"homepage": "https://github.com/icgc-argo/argo-data-submission#readme" | ||
} |
1 change: 1 addition & 0 deletions
1
wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/wfpr_modules
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../../../wfpr_modules |