-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpipeline.yml
81 lines (58 loc) · 3.82 KB
/
pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# ----------------------- #
# Spatial QC pipeline
# ----------------------- #
# Written by Fabiola Curion, Sarah Ouologuem
# This pipeline needs a sample metadata file (see resources for an example)
# Will run :
# scanpy QC
# summary QC plots
# Followed by preprocess pipeline.
# The qc_spatial pipeline does not perform any filtering of cells or genes.
# Filtering happens as the first step in the preprocess pipeline. See pipeline_preprocess_spatial for details
# ---------------------------
# 0. Compute resource options
# ---------------------------
resources:
# Number of threads used for parallel jobs
threads_high: 2 # Must be enough memory to load all input files and create MuDatas
threads_medium: 1 # Must be enough memory to load your mudata and do computationally light tasks
threads_low: 1 # Must be enough memory to load text files and do plotting, requires much less memory
# Path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
# ------------------------------------------------------------------------------------------------
# 1. Loading options
# ------------------------------------------------------------------------------------------------
project: test
# Submission_file format (example at resources/sample_file_qc_spatial.txt) :
submission_file: sample_file_qc_spatial.txt # Path to submission file
# ------------------------------------------------------------------------------------------------
# 2. QC options
# ------------------------------------------------------------------------------------------------
# -------------------------
# Calculate QC metrics
# -------------------------
# This part of the pipeline allows to generate the QC metrics that will be used to evaluate inclusion/exclusion criteria.
# Filtering of cells/genes happens in the pipeline pipeline_preprocess_spatial.py.
# Basic QC metrics using scanpy.pp.calculate_qc_metrics() are in every case calculated.
# Additional optional scores can be calculated for 1. cell cycle genes and 2. custom genes:
# 1. Cell cycle scores (optional)
# Leave options blank to avoid running
# Default file: panpipes/resources/cell_cycle_genes.tsv
ccgenes: default # "default" (uses panpipes/resources/cell_cycle_genes.tsv) or path to tsv file with columns "cc_phase" and "gene_name". "cc_phase" can either be "s" or "g2m". Information in tsv file used to run the function scanpy.tl.score_genes_cell_cycle()
# 2. Custom genes actions (optional)
# It's often practical to rely on known gene lists, for a series of tasks, like evaluating of mitochondrial genes, ribosomal genes, or excluding IGG genes from HVG selection.
# We collect useful gene lists in: panpipes/resources/custom_gene_lists_v1.tsv and define "actions" on them as follows:
# calc_proportions: calculate proportion of reads mapping to X genes over total number of reads, per cell
# score_genes: using scanpy.score_genes() function,
# Leave options blank to avoid running
# Default file: panpipes/resources/qc_genelist_1.0.csv
custom_genes_file: default # "default" (uses panpipes/resources/qc_genelist_1.0.csv) or path to csv file with columns "group" and "feature".
calc_proportions: hb,mt,rp # Comma-separated without spaces, which groups in the custom_genes_file to calculate percentages for
score_genes: MarkersNeutro # Comma-separated without spaces, which groups in the custom_genes_file to run scanpy.score_genes() for
# ---------------
# Plot QC metrics
# ---------------
# All metrics and grouping variables should be inputted as a comma separated string without spaces, e.g. a,b,c
plotqc:
grouping_var: sample_id # sample_id is the name of each spatial slide (specified in the submission file) and it will always be used to plot.
spatial_metrics: total_counts # If metric is present in both, .obs and .var, both will be plotted