Skip to content

Commit

Permalink
update download-aspera version
Browse files Browse the repository at this point in the history
  • Loading branch information
lindaxiang committed Aug 19, 2022
1 parent aa488d9 commit 88a29e0
Show file tree
Hide file tree
Showing 15 changed files with 327 additions and 15 deletions.
2 changes: 1 addition & 1 deletion ega-download-wf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ params.pyega3_ega_user=""
params.pyega3_ega_pass=""

include { downloadPyega3 } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/download-pyega3@0.1.3/main.nf' params([*:params, 'cleanup': false])
include { downloadAspera } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.1/main.nf' params([*:params, 'cleanup': false])
include { downloadAspera } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2/main.nf' params([*:params, 'cleanup': false])
include { decryptAspera } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/decrypt-aspera@0.1.1/main.nf' params([*:params, 'cleanup': false])

// please update workflow code as needed
Expand Down
2 changes: 1 addition & 1 deletion ega-download-wf/pkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"url": "https://github.com/icgc-argo/argo-data-submission.git"
},
"dependencies": [
"github.com/icgc-argo/argo-data-submission/download-aspera@0.1.1",
"github.com/icgc-argo/argo-data-submission/download-aspera@0.1.2",
"github.com/icgc-argo/argo-data-submission/download-pyega3@0.1.3",
"github.com/icgc-argo/argo-data-submission/decrypt-aspera@0.1.1"
],
Expand Down
1 change: 1 addition & 0 deletions ega-download-wf/tests/input/aspera.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ega_file_id path
2 changes: 1 addition & 1 deletion ega-download-wf/tests/input/pyega3.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ega_file_id path
EGAF00001770106 EGAF00001770106.bam
EGAF00001770106
Binary file removed ega-download-wf/tests/input/test_rg_3.bam
Binary file not shown.
9 changes: 2 additions & 7 deletions ega-download-wf/tests/local-test-job-aspera.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
{
"file_info_tsv" : "tests/input/pyega3.tsv",
"pyega3_ega_user" : "ega-test-data@ebi.ac.uk",
"pyega3_ega_pass" : "egarocks",
"download_mode" : "aspera",
"ascp_scp_host" : "",
"ascp_scp_user" : "",
"aspera_scp_pass" : ""
"file_info_tsv" : "input/aspera.tsv",
"download_mode" : "aspera"
}
7 changes: 2 additions & 5 deletions ega-download-wf/tests/local-test-job-pyega.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
{
"file_info_tsv" : "tests/input/pyega3.tsv",
"file_info_tsv" : "input/pyega3.tsv",
"pyega3_ega_user" : "ega-test-data@ebi.ac.uk",
"pyega3_ega_pass" : "egarocks",
"download_mode" : "pyega3",
"ascp_scp_host" : "",
"ascp_scp_user" : "",
"aspera_scp_pass" : ""
"download_mode" : "pyega3"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.gitignore
.nextflow*
tests
work
outdir
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM ubuntu:18.04

LABEL org.opencontainers.image.source https://github.com/icgc-argo/argo-data-submission
LABEL org.opencontainers.image.authors Edmund Su (edmund.su@oicr.on.ca)
LABEL org.opencontainers.image.title ICGC ARGO Data download image

RUN apt-get update && \
apt-get install -y python3-pip python3-dev software-properties-common curl && \
apt-get install -y libz-dev pkg-config libtool m4 autotools-dev automake libncurses5-dev libbz2-dev liblzma-dev

RUN mkdir /tools

# Install Ascp

ENV PATH="/tools:${PATH}"
ENV PATH="~/.aspera/connect/bin/:${PATH}"
COPY *.py /tools/

RUN groupadd -g 1000 ubuntu &&\
useradd -l -u 1000 -g ubuntu ubuntu &&\
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu

RUN echo "PATH="${PATH}"" >> /etc/environment
USER ubuntu

RUN cd /tmp &&\
curl -sSL -o ibm-aspera-connect_4.1.3.93_linux.tar.gz --retry 10 https://d3gcli72yxqn2z.cloudfront.net/connect_latest/v4/bin/ibm-aspera-connect_4.1.3.93_linux.tar.gz &&\
tar -zxvf ibm-aspera-connect_4.1.3.93_linux.tar.gz && \
chmod 775 ibm-aspera-connect_4.1.3.93_linux.sh && \
./ibm-aspera-connect_4.1.3.93_linux.sh

ENTRYPOINT ["/usr/bin/env"]

CMD ["/bin/bash"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Package download-aspera

Wrapper for Aspera to download files


## Inputs

See contents of `param-file`


## Outputs

Supplied file to be downloaded


## Usage

#### Contents of Param-file
```
{
"target_file" : "/aspera-test-dir-large/100MB", ### File to be download
"EGAF" : "EGAF000001", ### Associated EGAF id
"ASCP_SCP_HOST" : "demo.asperasoft.com", ### Host server address
"ASCP_SCP_USER" : "aspera", ### Host provided username
"ASPERA_SCP_PASS" : "demoaspera" ### Host provided password
}
```

#### Test run
`nextflow run checker.nf -params-file test-job-aspera.json`

#### IRL run
```
nextflow run ../main.nf -params-file test-job-aspera.json
```

### Run the package directly

With inputs prepared, you should be able to run the package directly using the following command.
Please replace the params file with a real one (with all required parameters and input files). Example
params file(s) can be found in the `tests` folder.

```
nextflow run icgc-argo/argo-data-submission/download-aspera/main.nf -r download-aspera.v0.1.0 -params-file <your-params-json-file>
```

### Import the package as a dependency

To import this package into another package as a dependency, please follow these steps at the
importing package side:

1. add this package's URI `github.com/icgc-argo/argo-data-submission/download-aspera@0.1.0` in the `dependencies` list of the `pkg.json` file
2. run `wfpm install` to install the dependency
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo/argo-data-submission/download-aspera@0.1.0/main.nf`
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env nextflow

/*
Copyright (C) 2022, icgc-argo
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Authors:
Edmund Su
Linda Xiang
*/

/********************************************************************/
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
version = '0.1.2'

container = [
'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.download-aspera'
]
default_container_registry = 'ghcr.io'
/********************************************************************/


// universal params go here
params.container_registry = ""
params.container_version = ""
params.container = ""

params.cpus = 1
params.mem = 1 // GB
params.publish_dir = "" // set to empty string will disable publishDir


// tool specific parmas go here, add / change as needed
params.target_file=''
params.ega_file_id=''
params.ascp_scp_host=''
params.ascp_scp_user=''
params.aspera_scp_pass=''

process downloadAspera {
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false
errorStrategy 'terminate'
cpus params.cpus
memory "${params.mem} GB"

input: // input, make update as needed
val target_file
val ega_file_id
val dependency

output: // output, make update as needed
path "${ega_file_id}/${regexed_file_name}", emit: output_file

script:
// add and initialize variables here as needed
regexed_file_name=target_file.replaceAll(/^.*\//,'')
"""
mkdir ${ega_file_id}
export ASCP_SCP_HOST=${params.ascp_scp_host}
export ASCP_SCP_USER=${params.ascp_scp_user}
export ASPERA_SCP_PASS=${params.aspera_scp_pass}
python3.6 /tools/main.py \\
-f ${target_file} \\
-o ${ega_file_id} \\
> download.log 2>&1
"""
}

// this provides an entry point for this main script, so it can be run directly without clone the repo
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
workflow {
downloadAspera(
params.target_file,
params.ega_file_id,
true
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Copyright (C) 2022, icgc-argo
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Authors:
Edmund Su
"""

import os
import sys
import argparse
import subprocess
import errno
import shutil
import random
import string


def main():
"""
Python implementation of tool: download-aspera
This is auto-generated Python code, please update as needed!
"""

parser = argparse.ArgumentParser(description='Download files from EGA aspera server')
parser.add_argument('-f', '--file_name', dest="file_name", help="EGA file name", required=True)
parser.add_argument('-o', '--output', dest='output', help="Output file folder", required=True)
results = parser.parse_args()

file_list = randomword(60)+".txt"

try:
try:
# Check if ASCP_EGA_HOST environment variable exists: ega host
os.environ['ASCP_SCP_HOST']

# Check if ASCP_EGA_USER environment variable exists: ega username
os.environ['ASCP_SCP_USER']

# Check if ASPERA_SCP_PASS environment variable exists: ascpera password
os.environ['ASPERA_SCP_PASS']
except KeyError:
raise KeyError("Global Variable: ASCP_SCP_HOST, ASCP_SCP_USER and ASPERA_SCP_PASS must exist in the environment.")

# Raise an error if the output file exists
if os.path.isfile(results.output+"/"+results.file_name):
raise ValueError("Output file already exists")

# Write the file to be downloaded to the temporary file
with open(file_list, 'w') as f:
f.write(results.file_name)
f.write('\n')

# Download process
result=subprocess.run(['/home/ubuntu/.aspera/connect/bin/ascp','-k','1','-QTl','100m','--file-list='+file_list,'--partial-file-suffix=PART','--ignore-host-key','--mode=recv','--host='+os.environ['ASCP_SCP_HOST'],'--user='+os.environ['ASCP_SCP_USER'],results.output])

if result.returncode==0:
subprocess.run("touch "+results.output+"/DOWNLOAD.SUCCESS",shell=True)
else:
subprocess.run("touch "+results.output+"/DOWNLOAD.FAILURE",shell=True)

# Deletion of temporary elements
os.remove(file_list)
except Exception as err:
print(str(err))
if os.path.isfile(file_list):
os.remove(file_list)
exit(1)



def randomword(length):
return(''.join(random.choice(string.ascii_lowercase) for i in range(length)))

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker {
enabled = true
runOptions = '-u \$(id -u):\$(id -g)'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"name": "download-aspera",
"version": "0.1.2",
"description": "Wrapper to utilize Aspera for EGA downloads",
"main": "main.nf",
"deprecated": false,
"keywords": [
"bioinformatics",
"seq",
"qc metrics"
],
"repository": {
"type": "git",
"url": "https://github.com/icgc-argo/argo-data-submission.git"
},
"container": {
"registries": [
{
"registry": "ghcr.io",
"type": "docker",
"org": "icgc-argo",
"default": true
}
]
},
"dependencies": [],
"devDependencies": [],
"contributors": [
{
"name": "Edmund Su",
"email": "edmund.su@oicr.on.ca"
}
],
"license": "GNU Affero General Public License v3",
"bugReport": "https://github.com/icgc-argo/argo-data-submission/issues",
"homepage": "https://github.com/icgc-argo/argo-data-submission#readme"
}

0 comments on commit 88a29e0

Please sign in to comment.