diff --git a/main.nf b/main.nf index 805c0935..c40b721f 100644 --- a/main.nf +++ b/main.nf @@ -380,6 +380,9 @@ workflow pcacv { workflow single_sample_scrublet { + include { + SC__SCANPY__CLUSTERING_PARAMS; + } from './src/scanpy/processes/cluster.nf' params(params) include { SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE; } from './src/scanpy/workflows/single_sample' params(params) @@ -389,6 +392,10 @@ workflow single_sample_scrublet { include { ANNOTATE_BY_CELL_METADATA; } from './src/utils/workflows/annotateByCellMetadata.nf' params(params) + include { + clean; + MAKE_UNIQUE_FILENAME; + } from './src/utils/processes/utils.nf' params(params) include { PUBLISH as PUBLISH_SINGLE_SAMPLE_SCRUBLET; } from './src/utils/workflows/utils.nf' params(params) @@ -404,19 +411,30 @@ workflow single_sample_scrublet { ) // Annotate the final processed file with doublet information inferred from Scrublet ANNOTATE_BY_CELL_METADATA( - SCANPY__SINGLE_SAMPLE.out.final_processed_data.map { - it -> tuple(it[0], it[1]) - }, + SCANPY__SINGLE_SAMPLE.out.final_processed_data, SCRUBLET__DOUBLET_REMOVAL.out.doublet_detection.map { it -> tuple(it[0], it[1]) }, "scrublet" ) + + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + + MAKE_UNIQUE_FILENAME( + ANNOTATE_BY_CELL_METADATA.out, + "single_sample_scrublet_annotated", + "NULL", + clusteringParams.isParameterExplorationModeOn() + ) + SC__H5AD_TO_LOOM( SCANPY__SINGLE_SAMPLE.out.filtered_data.map { it -> tuple(it[0], it[1]) }.join( - ANNOTATE_BY_CELL_METADATA.out + // Renaming the file is necessary here in order to avoid input collision + MAKE_UNIQUE_FILENAME.out.groupTuple().map { + it -> tuple(it[0], it[1]) + } ) ) @@ -426,7 +444,7 @@ workflow single_sample_scrublet { "SINGLE_SAMPLE_SCRUBLET", "loom", null, - false + clusteringParams.isParameterExplorationModeOn() ) } diff --git a/src/scanpy/workflows/single_sample.nf b/src/scanpy/workflows/single_sample.nf index 2de848cb..a4801793 100644 --- a/src/scanpy/workflows/single_sample.nf +++ b/src/scanpy/workflows/single_sample.nf @@ -119,8 +119,8 @@ workflow SINGLE_SAMPLE { 'SINGLE_SAMPLE.final_output' ) - marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map { - it -> tuple(it[0], it[1], null) + marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map { + it -> it.size() > 2 ? tuple(it[0], it[1], it[2..(it.size()-1)]) : it // stash params if multiple param exploration mode } // Publishing diff --git a/src/scrublet/workflows/doublet_removal.nf b/src/scrublet/workflows/doublet_removal.nf index eb50af71..b87a22d4 100644 --- a/src/scrublet/workflows/doublet_removal.nf +++ b/src/scrublet/workflows/doublet_removal.nf @@ -52,8 +52,9 @@ workflow DOUBLET_REMOVAL { ) ANNOTATE_BY_CELL_METADATA( + // Expects (sampleId, data, stashedParams) data.map { - it -> tuple(it[0], it[1]) + it -> tuple(it[0], it[1], it[3]) }, SC__SCRUBLET__DOUBLET_DETECTION.out.map { it -> tuple(it[0], it[1]) @@ -92,7 +93,9 @@ workflow DOUBLET_REMOVAL { it -> tuple(it[0], it[2]) }.join( // Get the h5ad with Scrublet info - ANNOTATE_BY_CELL_METADATA.out + ANNOTATE_BY_CELL_METADATA.out.map { + it -> tuple(it[0], it[1]) + } ).join( finalProcessedData.map { // Extract the Scrublet object file diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf index bdab3c1b..5ae2dc07 100644 --- a/src/utils/processes/h5adAnnotate.nf +++ b/src/utils/processes/h5adAnnotate.nf @@ -32,6 +32,7 @@ process SC__ANNOTATE_BY_CELL_METADATA { tuple \ val(sampleId), \ path(f), \ + val(stashedParams), \ path(metadata) // Expects tool name [string || null] val(tool) @@ -39,7 +40,8 @@ process SC__ANNOTATE_BY_CELL_METADATA { output: tuple \ val(sampleId), \ - path("${sampleId}.${toolTag}SC__ANNOTATE_BY_CELL_METADATA.h5ad") + path("${sampleId}.${toolTag}SC__ANNOTATE_BY_CELL_METADATA.h5ad"), \ + val(stashedParams) script: def sampleParams = params.parseConfig( diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf index d1d6cf5c..eca5b9dc 100644 --- a/src/utils/processes/h5adToLoom.nf +++ b/src/utils/processes/h5adToLoom.nf @@ -21,8 +21,9 @@ process SC__H5AD_TO_LOOM { path(data) output: - tuple val(sampleId), \ - path("${sampleId}.SC__H5AD_TO_LOOM.loom") + tuple \ + val(sampleId), \ + path("${sampleId}.SC__H5AD_TO_LOOM.loom") script: """ diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 55a26de2..9ed0b912 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -474,6 +474,45 @@ process SC__PUBLISH { """ } +process MAKE_UNIQUE_FILENAME { + + label 'compute_resources__minimal' + + input: + tuple \ + val(tag), \ + path(f), \ + val(stashedParams) + val(fileOutputSuffix) + val(toolName) + val(isParameterExplorationModeOn) + + output: + tuple \ + val(tag), \ + path(outputFileName), \ + val(stashedParams) + + script: + outputFileName = getOutputFileName( + params, + tag, + f, + fileOutputSuffix, + isParameterExplorationModeOn, + stashedParams + ) + /* avoid cases where the input and output files have identical names: + Move the input file to a unique name, then create a link to + the input file */ + """ + mv $f tmp + if [ ! -f ${outputFileName} ]; then + ln -L tmp "${outputFileName}" + fi + """ +} + process COMPRESS_HDF5() { diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 3fceb5e7..cd277870 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -19,7 +19,7 @@ include { workflow ANNOTATE_BY_CELL_METADATA { take: - // Expects (sampleId, h5ad) : Channel + // Expects (sampleId, h5ad, stashedParams?) : Channel data // Expects (sampleId, tsv) : (Channel || null) metadata @@ -41,7 +41,8 @@ workflow ANNOTATE_BY_CELL_METADATA { if(method == 'aio') { out = SC__ANNOTATE_BY_CELL_METADATA( data.map { - it -> tuple(it[0], it[1], file(workflowParams.cellMetaDataFilePath)) + // Add NULL as stashedParams if needed + it -> it.size() == 2 ? tuple(it[0], it[1], 'NULL', file(workflowParams.cellMetaDataFilePath)) : tuple(it[0], it[1], it[2], file(workflowParams.cellMetaDataFilePath)) }, isParamNull(tool) ? 'NULL' : tool ) @@ -53,7 +54,10 @@ workflow ANNOTATE_BY_CELL_METADATA { ) } out = SC__ANNOTATE_BY_CELL_METADATA( - data.join(metadata), + data.map { + // Add NULL as stashedParams if needed + it -> it.size() == 2 ? tuple(it[0], it[1], 'NULL') : it + }.combine(metadata, by: 0), isParamNull(tool) ? 'NULL' : tool ) } else { diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index 4e6b26e6..4a217f33 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -40,7 +40,9 @@ workflow FILTER_BY_CELL_METADATA { .from(workflowParams.filters) .set{ filters } SC__PREPARE_OBS_FILTER( - data.combine(filters), + data.map { + it -> tuple(it[0], it[1]) + }.combine(filters), isParamNull(tool) ? 'NULL' : tool ) out = SC__APPLY_OBS_FILTER(