Add sample_name removal of non-alphanumeric characters and nf-test fo…

…r sample_name
phac-nml · Nov 14, 2024 · 9e1e92a · 9e1e92a
1 parent b55683d
commit 9e1e92a
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,18 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.2.0]
+
+### `Changed`
+
+- Modified the template for input csv file to include a `sample_name` column in addition to `sample` in-line with changes to [IRIDA-Next update] as seen with the [speciesabundance pipeline]
+  - `sample_name` special characters will be replaced with `"_"`
+  - If no `sample_name` is supplied in the column `sample` will be used
+  - To avoid repeat values for `sample_name` all `sample_name` values will be suffixed with the unique `sample` value from the input file
+
+[IRIDA-Next update]: https://github.com/phac-nml/irida-next/pull/678
+[speciesabundance pipeline]: https://github.com/phac-nml/speciesabundance/pull/24
+
 ## [1.1.1] - 2024-04-19
 
 ### Added

diff --git a/tests/data/add-samplesheet.csv b/tests/data/add-samplesheet.csv
@@ -1,5 +1,5 @@
 sample,sample_name,insdc_accession
-SAMPLE1,S1,ERR1109373
-ERROR1,S2,SRR999908
-ERROR2,S3,SRR999934
-SAMPLE2,S4,SRR13191702
+SAMPLE1,S 1,ERR1109373
+SAMPLE2,S2,ERR1109373
+SAMPLE3,S2,SRR13191702
+SAMPLE4,,SRR13191702
diff --git a/tests/data/samplesheet-addsamplename.csv b/tests/data/samplesheet-addsamplename.csv
diff --git a/tests/workflows/fetchdatairidanext/main.nf.test b/tests/workflows/fetchdatairidanext/main.nf.test
@@ -0,0 +1,36 @@
+nextflow_workflow {
+
+    name "Test workflow: workflows/fetchdatairidanext.nf"
+    script "workflows/fetchdatairidanext.nf"
+    workflow "FETCHDATAIRIDANEXT"
+    tag "full workflow"
+    tag "fetchdatairdanext"
+
+    test("Samplesheets with sample_name") {
+        tag "sample_name"
+        when {
+            params {
+                input = "$baseDir/tests/data/add-samplesheet.csv"
+                outdir = "output"
+            }
+        }
+
+        then {
+            assert workflow.success
+            assert path("$launchDir/output").exists()
+
+            // Check that reads have the correct filename when supplying a sample_name
+            assert path("$launchDir/output/iridanext.output.json").exists()
+            def iridanext_json = path("$launchDir/output/iridanext.output.json").json
+            def iridanext_samples = iridanext_json.files.samples
+
+            assert iridanext_samples.SAMPLE1 == [['path':'reads/S_1_ERR1109373_2.fastq.gz'], ['path':'reads/S_1_ERR1109373_1.fastq.gz']]
+            assert iridanext_samples.SAMPLE2 == [['path':'reads/S2_ERR1109373_2.fastq.gz'], ['path':'reads/S2_ERR1109373_1.fastq.gz']]
+            assert iridanext_samples.SAMPLE3 == [['path':'reads/S2_SRR13191702_2.fastq.gz'], ['path':'reads/S2_SRR13191702_1.fastq.gz']]
+            assert iridanext_samples.SAMPLE4 == [['path':'reads/SRR13191702_2.fastq.gz'], ['path':'reads/SRR13191702_1.fastq.gz']]
+
+
+        }
+    }
+
+}
diff --git a/workflows/fetchdatairidanext.nf b/workflows/fetchdatairidanext.nf
@@ -56,10 +56,19 @@ workflow FETCHDATAIRIDANEXT {
     // Create a new channel of metadata from a sample sheet
     // NB: `input` corresponds to `params.input` and associated sample sheet schema
     input = Channel.fromSamplesheet("input")
-    meta_accessions = input.map {meta -> tuple(["id": meta.id.first(), "irida_id": meta.irida_id.first(), "insdc_accession": meta.insdc_accession.first()], meta.insdc_accession.first())}
+    // and remove non-alphanumeric characters in sample_names (meta.id)
+        .map { meta ->
+            if (meta.id[0]) {
+                    // Non-alphanumeric characters (excluding _,-,.) will be replaced with "_"
+                new_id = meta.id[0].replaceAll(/[^A-Za-z0-9_\.\-]/, '_') // meta.id appears to be an immutable list, the workaround is to create a new variable
+            } else {
+                new_id = meta.id[0]
+            }
+            return [["id": new_id, "irida_id": meta.irida_id[0], "insdc_accession": meta.insdc_accession[0]], meta.insdc_accession[0]]
+        }
 
     FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS (
-        ch_sra_ids = meta_accessions,
+        ch_sra_ids = input,
         ch_dbgap_key = []
     )
     ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions)