vib-singlecell-nf · dweemx · Feb 15, 2021 · Feb 15, 2021 · KrisDavie · Feb 16, 2021
diff --git a/main.nf b/main.nf
@@ -380,6 +380,9 @@ workflow pcacv {
 
 workflow single_sample_scrublet {
 
+    include {
+        SC__SCANPY__CLUSTERING_PARAMS;
+    } from './src/scanpy/processes/cluster.nf' params(params)
     include {
         SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE;
     } from './src/scanpy/workflows/single_sample' params(params)
@@ -389,6 +392,10 @@ workflow single_sample_scrublet {
     include {
         ANNOTATE_BY_CELL_METADATA;
     } from './src/utils/workflows/annotateByCellMetadata.nf' params(params)
+    include {
+        clean;
+        MAKE_UNIQUE_FILENAME;
+    } from './src/utils/processes/utils.nf' params(params)
     include {
         PUBLISH as PUBLISH_SINGLE_SAMPLE_SCRUBLET;
     } from './src/utils/workflows/utils.nf' params(params)
@@ -404,19 +411,30 @@ workflow single_sample_scrublet {
     )
     // Annotate the final processed file with doublet information inferred from Scrublet
     ANNOTATE_BY_CELL_METADATA(
-        SCANPY__SINGLE_SAMPLE.out.final_processed_data.map {
-            it -> tuple(it[0], it[1])
-        },
+        SCANPY__SINGLE_SAMPLE.out.final_processed_data,
         SCRUBLET__DOUBLET_REMOVAL.out.doublet_detection.map {
             it -> tuple(it[0], it[1])
         },
         "scrublet"
     )
+
+    def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) )
+
+    MAKE_UNIQUE_FILENAME(
+        ANNOTATE_BY_CELL_METADATA.out,
+        "single_sample_scrublet_annotated",
+        "NULL",
+        clusteringParams.isParameterExplorationModeOn()
+    )
+
     SC__H5AD_TO_LOOM(
         SCANPY__SINGLE_SAMPLE.out.filtered_data.map {
             it -> tuple(it[0], it[1])
         }.join(
-            ANNOTATE_BY_CELL_METADATA.out
+            // Renaming the file is necessary here in order to avoid input collision
+            MAKE_UNIQUE_FILENAME.out.groupTuple().map {
+                it -> tuple(it[0], it[1])
+            }
         )
     )
 
@@ -426,7 +444,7 @@ workflow single_sample_scrublet {
             "SINGLE_SAMPLE_SCRUBLET",
             "loom",
             null,
-            false
+            clusteringParams.isParameterExplorationModeOn()
         )
     }
 

diff --git a/src/scanpy/workflows/single_sample.nf b/src/scanpy/workflows/single_sample.nf
@@ -119,8 +119,8 @@ workflow SINGLE_SAMPLE {
             'SINGLE_SAMPLE.final_output'
         )
 
-        marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map { 
-            it -> tuple(it[0], it[1], null)
+        marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map {
+            it -> it.size() > 2 ? tuple(it[0], it[1], it[2..(it.size()-1)]) : it // stash params if multiple param exploration mode
         }
 
         // Publishing

diff --git a/src/scrublet/workflows/doublet_removal.nf b/src/scrublet/workflows/doublet_removal.nf
@@ -52,8 +52,9 @@ workflow DOUBLET_REMOVAL {
         )
 
         ANNOTATE_BY_CELL_METADATA(
+            // Expects (sampleId, data, stashedParams)
             data.map {
-                it -> tuple(it[0], it[1])
+                it -> tuple(it[0], it[1], it[3])
             },
             SC__SCRUBLET__DOUBLET_DETECTION.out.map {
                 it -> tuple(it[0], it[1])
@@ -92,7 +93,9 @@ workflow DOUBLET_REMOVAL {
                 it -> tuple(it[0], it[2])
             }.join(
                 // Get the h5ad with Scrublet info
-                ANNOTATE_BY_CELL_METADATA.out
+                ANNOTATE_BY_CELL_METADATA.out.map {
+                    it -> tuple(it[0], it[1])
+                }
             ).join(
                 finalProcessedData.map {
                     // Extract the Scrublet object file

diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf
@@ -32,14 +32,16 @@ process SC__ANNOTATE_BY_CELL_METADATA {
         tuple \
             val(sampleId), \
             path(f), \
+            val(stashedParams), \
             path(metadata)
         // Expects tool name [string || null]
         val(tool)
 
     output:
         tuple \
             val(sampleId), \
-            path("${sampleId}.${toolTag}SC__ANNOTATE_BY_CELL_METADATA.h5ad")
+            path("${sampleId}.${toolTag}SC__ANNOTATE_BY_CELL_METADATA.h5ad"), \
+            val(stashedParams)
 
     script:
         def sampleParams = params.parseConfig(

diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf
@@ -21,8 +21,9 @@ process SC__H5AD_TO_LOOM {
 			path(data)
 
 	output:
-		tuple val(sampleId), \
-		path("${sampleId}.SC__H5AD_TO_LOOM.loom")
+		tuple \
+			val(sampleId), \
+			path("${sampleId}.SC__H5AD_TO_LOOM.loom")
 
 	script:
 		"""

diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf
@@ -474,6 +474,45 @@ process SC__PUBLISH {
         """
 }
 
+process MAKE_UNIQUE_FILENAME {
+
+    label 'compute_resources__minimal'
+
+    input:
+        tuple \
+            val(tag), \
+            path(f), \
+            val(stashedParams)
+        val(fileOutputSuffix)
+        val(toolName)
+        val(isParameterExplorationModeOn)
+
+    output:
+        tuple \
+            val(tag), \
+            path(outputFileName), \
+            val(stashedParams)
+
+    script:
+        outputFileName = getOutputFileName(
+            params,
+            tag,
+            f,
+            fileOutputSuffix,
+            isParameterExplorationModeOn,
+            stashedParams
+        )
+        /* avoid cases where the input and output files have identical names:
+           Move the input file to a unique name, then create a link to
+           the input file */
+        """
+        mv $f tmp
+        if [ ! -f ${outputFileName} ]; then
+            ln -L tmp "${outputFileName}"
+        fi
+        """
+}
+
 
 process COMPRESS_HDF5() {
 

diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf
@@ -19,7 +19,7 @@ include {
 workflow ANNOTATE_BY_CELL_METADATA {
 
     take:
-        // Expects (sampleId, h5ad) : Channel
+        // Expects (sampleId, h5ad, stashedParams?) : Channel
         data
         // Expects (sampleId, tsv) : (Channel || null)
         metadata
@@ -41,7 +41,8 @@ workflow ANNOTATE_BY_CELL_METADATA {
         if(method == 'aio') {
             out = SC__ANNOTATE_BY_CELL_METADATA( 
                 data.map {
-                    it -> tuple(it[0], it[1], file(workflowParams.cellMetaDataFilePath))
+                    // Add NULL as stashedParams if needed
+                    it -> it.size() == 2 ? tuple(it[0], it[1], 'NULL', file(workflowParams.cellMetaDataFilePath)) : tuple(it[0], it[1], it[2], file(workflowParams.cellMetaDataFilePath))
                 },
                 isParamNull(tool) ? 'NULL' : tool
             )
@@ -53,7 +54,10 @@ workflow ANNOTATE_BY_CELL_METADATA {
                 )
             }
             out = SC__ANNOTATE_BY_CELL_METADATA(
-                data.join(metadata),
+                data.map {
+                    // Add NULL as stashedParams if needed
+                    it -> it.size() == 2 ? tuple(it[0], it[1], 'NULL') : it
+                }.combine(metadata, by: 0),
                 isParamNull(tool) ? 'NULL' : tool
             )
         } else {

diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf
@@ -40,7 +40,9 @@ workflow FILTER_BY_CELL_METADATA {
             .from(workflowParams.filters)
             .set{ filters }
         SC__PREPARE_OBS_FILTER(
-            data.combine(filters),
+            data.map {
+                it -> tuple(it[0], it[1])
+            }.combine(filters),
             isParamNull(tool) ? 'NULL' : tool
         )
         out = SC__APPLY_OBS_FILTER(