Skip to content
This repository has been archived by the owner on Apr 19, 2023. It is now read-only.

Fixes #303 #311

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ workflow pcacv {

workflow single_sample_scrublet {

include {
SC__SCANPY__CLUSTERING_PARAMS;
} from './src/scanpy/processes/cluster.nf' params(params)
include {
SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE;
} from './src/scanpy/workflows/single_sample' params(params)
Expand All @@ -389,6 +392,10 @@ workflow single_sample_scrublet {
include {
ANNOTATE_BY_CELL_METADATA;
} from './src/utils/workflows/annotateByCellMetadata.nf' params(params)
include {
clean;
MAKE_UNIQUE_FILENAME;
} from './src/utils/processes/utils.nf' params(params)
include {
PUBLISH as PUBLISH_SINGLE_SAMPLE_SCRUBLET;
} from './src/utils/workflows/utils.nf' params(params)
Expand All @@ -404,19 +411,30 @@ workflow single_sample_scrublet {
)
// Annotate the final processed file with doublet information inferred from Scrublet
ANNOTATE_BY_CELL_METADATA(
SCANPY__SINGLE_SAMPLE.out.final_processed_data.map {
it -> tuple(it[0], it[1])
},
SCANPY__SINGLE_SAMPLE.out.final_processed_data,
SCRUBLET__DOUBLET_REMOVAL.out.doublet_detection.map {
it -> tuple(it[0], it[1])
},
"scrublet"
)

def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) )

MAKE_UNIQUE_FILENAME(
ANNOTATE_BY_CELL_METADATA.out,
"single_sample_scrublet_annotated",
"NULL",
clusteringParams.isParameterExplorationModeOn()
)

SC__H5AD_TO_LOOM(
SCANPY__SINGLE_SAMPLE.out.filtered_data.map {
it -> tuple(it[0], it[1])
}.join(
ANNOTATE_BY_CELL_METADATA.out
// Renaming the file is necessary here in order to avoid input collision
MAKE_UNIQUE_FILENAME.out.groupTuple().map {
it -> tuple(it[0], it[1])
}
)
)

Expand All @@ -426,7 +444,7 @@ workflow single_sample_scrublet {
"SINGLE_SAMPLE_SCRUBLET",
"loom",
null,
false
clusteringParams.isParameterExplorationModeOn()
)
}

Expand Down
4 changes: 2 additions & 2 deletions src/scanpy/workflows/single_sample.nf
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ workflow SINGLE_SAMPLE {
'SINGLE_SAMPLE.final_output'
)

marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map {
it -> tuple(it[0], it[1], null)
marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map {
it -> it.size() > 2 ? tuple(it[0], it[1], it[2..(it.size()-1)]) : it // stash params if multiple param exploration mode
}

// Publishing
Expand Down
7 changes: 5 additions & 2 deletions src/scrublet/workflows/doublet_removal.nf
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ workflow DOUBLET_REMOVAL {
)

ANNOTATE_BY_CELL_METADATA(
// Expects (sampleId, data, stashedParams)
data.map {
it -> tuple(it[0], it[1])
it -> tuple(it[0], it[1], it[3])
},
SC__SCRUBLET__DOUBLET_DETECTION.out.map {
it -> tuple(it[0], it[1])
Expand Down Expand Up @@ -92,7 +93,9 @@ workflow DOUBLET_REMOVAL {
it -> tuple(it[0], it[2])
}.join(
// Get the h5ad with Scrublet info
ANNOTATE_BY_CELL_METADATA.out
ANNOTATE_BY_CELL_METADATA.out.map {
it -> tuple(it[0], it[1])
}
).join(
finalProcessedData.map {
// Extract the Scrublet object file
Expand Down
4 changes: 3 additions & 1 deletion src/utils/processes/h5adAnnotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,16 @@ process SC__ANNOTATE_BY_CELL_METADATA {
tuple \
val(sampleId), \
path(f), \
val(stashedParams), \
path(metadata)
// Expects tool name [string || null]
val(tool)

output:
tuple \
val(sampleId), \
path("${sampleId}.${toolTag}SC__ANNOTATE_BY_CELL_METADATA.h5ad")
path("${sampleId}.${toolTag}SC__ANNOTATE_BY_CELL_METADATA.h5ad"), \
val(stashedParams)

script:
def sampleParams = params.parseConfig(
Expand Down
5 changes: 3 additions & 2 deletions src/utils/processes/h5adToLoom.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ process SC__H5AD_TO_LOOM {
path(data)

output:
tuple val(sampleId), \
path("${sampleId}.SC__H5AD_TO_LOOM.loom")
tuple \
val(sampleId), \
path("${sampleId}.SC__H5AD_TO_LOOM.loom")

script:
"""
Expand Down
39 changes: 39 additions & 0 deletions src/utils/processes/utils.nf
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,45 @@ process SC__PUBLISH {
"""
}

process MAKE_UNIQUE_FILENAME {

label 'compute_resources__minimal'

input:
tuple \
val(tag), \
path(f), \
val(stashedParams)
val(fileOutputSuffix)
val(toolName)
val(isParameterExplorationModeOn)

output:
tuple \
val(tag), \
path(outputFileName), \
val(stashedParams)

script:
outputFileName = getOutputFileName(
params,
tag,
f,
fileOutputSuffix,
isParameterExplorationModeOn,
stashedParams
)
/* avoid cases where the input and output files have identical names:
Move the input file to a unique name, then create a link to
the input file */
"""
mv $f tmp
if [ ! -f ${outputFileName} ]; then
ln -L tmp "${outputFileName}"
fi
"""
}

Comment on lines +477 to +515
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't we discuss using the stageAs option from nextflow for this kind of issue? It seems redundant to use a link and all the moving when nextflow is able to do this. I know that some functions use the name of the original file to determine some information, but then it seems that maybe this could just be passed as an argument to those processes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you would remove this NXF process and use stageAs in the process where the output should be groupTubled ?


process COMPRESS_HDF5() {

Expand Down
10 changes: 7 additions & 3 deletions src/utils/workflows/annotateByCellMetadata.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ include {
workflow ANNOTATE_BY_CELL_METADATA {

take:
// Expects (sampleId, h5ad) : Channel
// Expects (sampleId, h5ad, stashedParams?) : Channel
data
// Expects (sampleId, tsv) : (Channel || null)
metadata
Expand All @@ -41,7 +41,8 @@ workflow ANNOTATE_BY_CELL_METADATA {
if(method == 'aio') {
out = SC__ANNOTATE_BY_CELL_METADATA(
data.map {
it -> tuple(it[0], it[1], file(workflowParams.cellMetaDataFilePath))
// Add NULL as stashedParams if needed
it -> it.size() == 2 ? tuple(it[0], it[1], 'NULL', file(workflowParams.cellMetaDataFilePath)) : tuple(it[0], it[1], it[2], file(workflowParams.cellMetaDataFilePath))
},
isParamNull(tool) ? 'NULL' : tool
)
Expand All @@ -53,7 +54,10 @@ workflow ANNOTATE_BY_CELL_METADATA {
)
}
out = SC__ANNOTATE_BY_CELL_METADATA(
data.join(metadata),
data.map {
// Add NULL as stashedParams if needed
it -> it.size() == 2 ? tuple(it[0], it[1], 'NULL') : it
}.combine(metadata, by: 0),
isParamNull(tool) ? 'NULL' : tool
)
} else {
Expand Down
4 changes: 3 additions & 1 deletion src/utils/workflows/filterByCellMetadata.nf
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ workflow FILTER_BY_CELL_METADATA {
.from(workflowParams.filters)
.set{ filters }
SC__PREPARE_OBS_FILTER(
data.combine(filters),
data.map {
it -> tuple(it[0], it[1])
}.combine(filters),
isParamNull(tool) ? 'NULL' : tool
)
out = SC__APPLY_OBS_FILTER(
Expand Down