Skip to content

Commit

Permalink
Merge pull request #15 from phac-nml/handle-errors
Browse files Browse the repository at this point in the history
Handling Download Errors
  • Loading branch information
emarinier authored Apr 10, 2024
2 parents cc8f996 + 36cec88 commit 03e156a
Show file tree
Hide file tree
Showing 18 changed files with 180 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/linting_comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download lint results
uses: dawidd6/action-download-artifact@v2
uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3
with:
workflow: linting.yml
workflow_conclusion: completed
Expand All @@ -21,7 +21,7 @@ jobs:
run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT

- name: Post PR comment
uses: marocchino/sticky-pull-request-comment@v2
uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
number: ${{ steps.pr_number.outputs.pr_number }}
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ testing*
*.swp
/.nf-test
/.nf-test.log
ids.csv
ids.csv
4 changes: 4 additions & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ lint:
- .github/workflows/awstest.yml
- .github/workflows/awsfulltest.yml
- CODE_OF_CONDUCT.md
- lib/Utils.groovy
- lib/WorkflowMain.groovy
- lib/NfcoreTemplate.groovy
- lib/WorkflowFetchdatairidanext.groovy
files_unchanged:
- assets/sendmail_template.txt
- assets/email_template.html
Expand Down
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [1.1.0] - 2024-04-10

### Added

- The ability to handle individual download errors. These errors will be reported in `prefetch/failures_report.csv`.

## [1.0.1] - 2024-02-22

Expand Down
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ Within the `files` section of this JSON file, all of the output paths are relati

An additional example of this file can be found at [tests/data/test1_iridanext.output.json](tests/data/test1_iridanext.output.json).

## Failures

If one or more samples fail to download, the workflow will still attempt to download all other samples in the samplesheet. The samples that fail to download will be reported in a file named `results/prefetch/failures_report.csv`. This CSV file has two columns: `sample` (the name of the sample, matching the input samplesheet) and `error_accession` (the accession that failed to download).

For example:

```
sample,error_accession
ERROR1,SRR999908
ERROR2,SRR999934
```

# Acknowledgements

This pipeline uses code and infrastructure developed and maintained by the [nf-core][nf-core] initative, and reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).
Expand Down
1 change: 1 addition & 0 deletions conf/iridanext.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ iridanext {
validate = true
files {
idkey = "id"
global = ["**/prefetch/failures_report.csv"]
samples = ["**/reads/*.fastq.gz"]
}
}
Expand Down
1 change: 1 addition & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ process {
}

withName: SRATOOLS_PREFETCH {
errorStrategy = 'ignore'
maxForks = params.max_jobs_with_network_connections
}

Expand Down
Binary file removed lib/nfcore_external_java_deps.jar
Binary file not shown.
21 changes: 21 additions & 0 deletions modules/local/prefetchchecker/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
process PREFETCH_CHECKER {
tag "prefetch_checker"
label 'process_low'

input:
val failures // list of failures

output:
path("failures_report.csv"), emit: failure_report

exec:
task.workDir.resolve("failures_report.csv").withWriter { writer ->

writer.writeLine("sample,error_accession") // header

// Failures
if (failures.size() > 0) {
failures.each { writer.writeLine "${it[0].id},${it[1]}" }
}
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ manifest {
description = """IRIDA Next pipeline for fetching data from NCBI"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '1.0.1'
version = '1.1.0'
doi = ''
defaultBranch = 'main'
}
Expand Down
17 changes: 10 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black.
# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff.
# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
[tool.black]
[tool.ruff]
line-length = 120
target_version = ["py37", "py38", "py39", "py310"]
target-version = "py38"
select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
cache-dir = "~/.cache/ruff"

[tool.isort]
profile = "black"
known_first_party = ["nf_core"]
multi_line_output = 3
[tool.ruff.isort]
known-first-party = ["nf_core"]

[tool.ruff.per-file-ignores]
"__init__.py" = ["E402", "F401"]
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/custom/sratoolsncbisettings/main'
include { SRATOOLS_PREFETCH } from '../../../modules/nf-core/sratools/prefetch/main'
include { PREFETCH_CHECKER } from '../../../modules/local/prefetchchecker/main'
include { SRATOOLS_FASTERQDUMP } from '../../../modules/local/sratools/fasterqdump/main'

//
Expand Down Expand Up @@ -27,6 +28,12 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS {
SRATOOLS_PREFETCH ( ch_sra_ids, ch_ncbi_settings, ch_dbgap_key )
ch_versions = ch_versions.mix(SRATOOLS_PREFETCH.out.versions.first())

fetches = ch_sra_ids.join(SRATOOLS_PREFETCH.out.sra, remainder: true)
failed_fetches = fetches.filter { it[2] == null }
.toList()

PREFETCH_CHECKER (failed_fetches)

//
// Convert the SRA format into one or more compressed FASTQ files.
//
Expand Down
5 changes: 5 additions & 0 deletions tests/data/errorsheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sample,insdc_accession
SAMPLE1,ERR1109373
ERROR1,SRR999908
ERROR2,SRR999934
SAMPLE2,SRR13191702
35 changes: 35 additions & 0 deletions tests/data/prefetch_errors_iridanext.output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"files": {
"global": [
{
"path": "prefetch/failures_report.csv"
}
],
"samples": {
"SAMPLE1": [
{
"path": "reads/ERR1109373_2.fastq.gz"
},
{
"path": "reads/ERR1109373_1.fastq.gz"
},
{
"path": "reads/ERR1109373.fastq.gz"
}
],
"SAMPLE2": [
{
"path": "reads/SRR13191702_2.fastq.gz"
},
{
"path": "reads/SRR13191702_1.fastq.gz"
}
]
}
},
"metadata": {
"samples": {

}
}
}
4 changes: 3 additions & 1 deletion tests/data/test1_iridanext.output.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
{
"files": {
"global": [

{
"path": "prefetch/failures_report.csv"
}
],
"samples": {
"SAMPLE2": [
Expand Down
29 changes: 29 additions & 0 deletions tests/pipelines/fetchdatairidanext.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,33 @@ nextflow_pipeline {
assert path("$launchDir/test1_out/reads/SRR13191702_2.fastq.gz").linesGzip.size() == 364
}
}

test("integration test with prefetch failures") {

when {
params {
input = "$baseDir/tests/data/errorsheet.csv"
outdir = "results"
}
}

then {
assert workflow.success

// IRIDA Next output file
assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/prefetch_errors_iridanext.output.json").json

// Output data:
assert path("$launchDir/results/reads/ERR1109373_1.fastq.gz").linesGzip.size() == 512
assert path("$launchDir/results/reads/ERR1109373_2.fastq.gz").linesGzip.size() == 512
assert path("$launchDir/results/reads/SRR13191702_1.fastq.gz").linesGzip.size() == 364
assert path("$launchDir/results/reads/SRR13191702_2.fastq.gz").linesGzip.size() == 364

// These files should have failed, and have no output reads:
assert path("$launchDir/results/reads/SRR999908_1.fastq.gz").exists() == false
assert path("$launchDir/results/reads/SRR999908_2.fastq.gz").exists() == false
assert path("$launchDir/results/reads/SRR999934_1.fastq.gz").exists() == false
assert path("$launchDir/results/reads/SRR999934_2.fastq.gz").exists() == false
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,46 @@ nextflow_workflow {
{ assert workflow.success },
{ assert snapshot(workflow.out).match() }
)

assert path("$launchDir/output").exists()

def lines = path("$launchDir/output/prefetch/failures_report.csv").readLines()
assert lines.size() == 1
assert lines.contains("sample,error_accession")
assert lines.contains("test_single_end,DRR000774").equals(false)
assert lines.contains("test_paired_end,SRR11140744").equals(false)
}
}

test("Download errors: 403 and invalid") {

when {
workflow {
"""
input[0] = Channel.of(
[[ id:'SAMPLE1', single_end:false ], 'ERR1109373'],
[[ id:'ERROR1', single_end:false ], 'SRR999908'],
[[ id:'ERROR2', single_end:false ], 'INVALID!!'],
[[ id:'SAMPLE2', single_end:false ], 'SRR13191702']
)
input[1] = []
"""
}
params {
outdir = "output"
}
}

then {
assert workflow.success
assert path("$launchDir/output").exists()

def lines = path("$launchDir/output/prefetch/failures_report.csv").readLines()
assert lines.size() == 3
assert lines.contains("sample,error_accession")
assert lines.contains("ERROR1,SRR999908")
assert lines.contains("ERROR2,INVALID!!")
}
}

}

0 comments on commit 03e156a

Please sign in to comment.