diff --git a/docs/yaml_docs/pipeline_integration_yml.md b/docs/yaml_docs/pipeline_integration_yml.md
index 01906760..63cc9466 100644
--- a/docs/yaml_docs/pipeline_integration_yml.md
+++ b/docs/yaml_docs/pipeline_integration_yml.md
@@ -16,30 +16,28 @@ When running the integration workflow, panpipes provides you with a basic `pipel
 
 You can download the different integration pipeline.yml files here:
 - Basic `pipeline.yml` file (not pre-filled) that is generated when calling `panpipes integration config`: [Download here](https://github.com/DendrouLab/panpipes/blob/main/panpipes/panpipes/pipeline_integration/pipeline.yml)
-- `pipeline.yml`for [Integration tutorial](https://panpipes-tutorials.readthedocs.io/en/latest/uni_multi_integration/pipeline_yml.html)
+- `pipeline.yml`for Integration tutorial: [View and Download here](https://panpipes-tutorials.readthedocs.io/en/latest/uni_multi_integration/pipeline_yml.html)
 
 For more information on functionalities implemented in `panpipes` to read the configuration files, such as reading blocks of parameters and reusing blocks with  `&anchors` and `*scalars`, please check [our documentation](./useful_info_on_yml.md)
 
 ## Compute resources options
 
-- <span class="parameter">resources</span>
-
+<span class="parameter">resources</span><br>
 Computing resources to use, specifically the number of threads used for parallel jobs.
 Specified by the following parameters:
-
-   - <span class="parameter">threads_high</span> `Integer`, Default: 1<br>
-    Number of threads used for high intensity computing tasks. 
-    For each thread, there must be enough memory to load your MuData object which was created in the preprocessing step of 
-the workflow.
+  - <span class="parameter">threads_high</span> `Integer`, Default: 1<br>
+   Number of threads used for high intensity computing tasks. 
+   For each thread, there must be enough memory to load your MuData object which was created in the preprocessing step of 
+   the workflow.
        
-   - <span class="parameter">threads_medium</span> `Integer`, Default: 1<br>
-    Number of threads used for medium intensity computing tasks.
-    For each thread, there must be enough memory to load your mudata and do computationally light tasks.
+  - <span class="parameter">threads_medium</span> `Integer`, Default: 1<br>
+   Number of threads used for medium intensity computing tasks.
+   For each thread, there must be enough memory to load your mudata and do computationally light tasks.
 
   - <span class="parameter">threads_low</span> `Integer`, Default: 1<br>
-  	 Number of threads used for low intensity computing tasks.
-     For each thread, there must be enough memory to load text files and do plotting, requires much less memory than the other two.  
-   - <span class="parameter">threads_gpu</span> `Integer`, Default: 2<br>
+   Number of threads used for low intensity computing tasks.
+   For each thread, there must be enough memory to load text files and do plotting, requires much less memory than the other two.  
+  - <span class="parameter">threads_gpu</span> `Integer`, Default: 2<br>
    Number of cores per gpu used for computing tasks.
    For each thread, there must be enough memory to compute the tasks above. 
 
@@ -47,10 +45,16 @@ the workflow.
   Path to conda environment that should be used to run panpipes.
   Leave blank if running native or your cluster automatically inherits the login node environment
 
+<span class="parameter">queues</span><br>
+Allows for tweaking which queues jobs get submitted to, in case there is a special queue for long jobs, or you have access to a gpu-specific queue.
+The default queue should be specified in your .cgat.yml file.
+Leave blank if you do not want to use any alternative queues.
+  - <span class="parameter">long</span><br>
+  - <span class="parameter">gpu</span><br>
+
 ## Loading and merging data options
 ### Data format
 
-
 <span class="parameter">sample_prefix</span> `String`, Mandatory parameter, Default: test<br>
 Prefix for the sample that comes out of the filtering/ preprocessing steps of the workflow.
 
@@ -60,9 +64,9 @@ Prefix for the sample that comes out of the filtering/ preprocessing steps of th
 
 ## Batch correction
 
-**Unimodal: correct each modality independently**
+**Batch correction is done unimodal, meaning each modality is batch corrected independently.**
 
-## RNA modality
+### RNA modality
 
 <span class="parameter">rna:</span> 
   Batch correction for the RNA modality is specified by the following parameters:
@@ -80,7 +84,7 @@ Prefix for the sample that comes out of the filtering/ preprocessing steps of th
 
      The column name of the covariate you want want to batch correct on, if a comma-separated list is specified then all will be used simultaneously.
  
-### Harmony arguments
+#### Harmony arguments
   
 - <span class="parameter">harmony:</span>
     Basic parameters required to run harmony:
@@ -91,14 +95,14 @@ Prefix for the sample that comes out of the filtering/ preprocessing steps of th
 
   For more information on `harmony` check the [harmony documentation](https://portals.broadinstitute.org/harmony/reference/RunHarmony.html)
 
-### BBKNN arguments
+#### BBKNN arguments
 
 - <span class="parameter">bbknn:</span>  
   - <span class="parameter">neighbors_within_batch:</span> `Integer`, Default: 3<br>
 
 For more information on `bbknn` check the [bbknn documentation](https://bbknn.readthedocs.io/en/latest/) 
 
-### SCVI arguments
+#### SCVI arguments
   -  <span class="parameter">scvi</span>: SCVI parameters are specified as
       - <span  class="parameter">exclude_mt_genes:</span> `Boolean`, Default: True<br>
       - <span  class="parameter">exclude_mt_genes:</span> `String`, Default: mt<br>
@@ -134,7 +138,7 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
          
   For more information on `scvi` check the [scvi documentation](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCVI.html)
 
-### Find neighbour parameters 
+#### Find neighbour parameters 
 Parameters to compute the connectivity graph on RNA
 
 - <span class="parameter">neighbors:</span> `String`<br>
@@ -152,7 +156,7 @@ Parameters to compute the connectivity graph on RNA
     The method can either be scanpy or hnsw
 
 
-## Protein modality
+### Protein modality
 <span class="parameter">prot:</span> 
   Batch correction for the protein modality is specified by the following parameters:
   
@@ -168,34 +172,33 @@ Parameters to compute the connectivity graph on RNA
 
      The column you want to batch correct on, if a comma-separated list is specified then all will be used simultaneously
  
-### Harmony arguments
+#### Harmony arguments
   
-- <span class="parameter">harmony:</span>
-    Basic parameters required to run harmony:
-   
-    - <span class="parameter">sigma</span> `Float`, Default: 0.1<br>
-    - <span class="parameter">theta</span> `Float`, Default: 1.0<br>
-    - <span class="parameter">npcs</span> `Integer`, Default: 30<br>
+<span class="parameter">harmony</span><br>
+Basic parameters required to run harmony:
 
-  For more information on `harmony` check the [harmony documentation](https://portals.broadinstitute.org/harmony/reference/RunHarmony.html)
+- <span class="parameter">sigma</span> `Float`, Default: 0.1<br>
+- <span class="parameter">theta</span> `Float`, Default: 1.0<br>
+- <span class="parameter">npcs</span> `Integer`, Default: 30<br>
 
-  
-### BBKNN arguments
+For more information on `harmony` check the [harmony documentation](https://portals.broadinstitute.org/harmony/reference/RunHarmony.html)
 
+  
+#### BBKNN arguments
 
-- <span class="parameter">bbknn:</span>  
+<span class="parameter">bbknn</span><br> 
   - <span class="parameter">neighbors_within_batch:</span> `Integer`, Default: 3<br>
 
 For more information on `bbknn` check the [bbknn documentation](https://bbknn.readthedocs.io/en/latest/) 
 
-### Find neighbour parameters 
+#### Find neighbour parameters 
 
 Parameters to compute the connectivity graph on Protein
 
-- <span class="parameter">neighbors:</span> `String`, Default: &prot_neighbors<br>
+<span class="parameter">neighbors</span> `String`, Default: &prot_neighbors<br>
  
-  - <span class="parameter">npcs</span> `Integer`, Default: 30<br>   
-   Number of principal components to calculate for neighbors and Umap
+  - <span class="parameter">npcs</span> `Integer`, Default: 30<br>
+    Number of principal components to calculate for neighbors and Umap
   
   -  <span class="parameter">k</span> `Integer`, Default: 30<br>
   Number of neighbors
@@ -207,7 +210,7 @@ Parameters to compute the connectivity graph on Protein
     The method can either be scanpy or hnsw
 
 
-## ATAC modality 
+### ATAC modality 
 
 <span class="parameter">atac:</span> 
   Batch correction for the ATAC modality is specified by the following parameters:
@@ -216,17 +219,18 @@ Parameters to compute the connectivity graph on Protein
     Defines if you want the batch correction to run. If set to `False`, `PCA` with default parameters is calculated. 
     
   - <span class="parameter">dimred</span> `String`, Default: PCA<br>
-    Defines if which dimensionality reduction to use, PCA or LSI 
+    Defines which dimensionality reduction to use. Available options are PCA and LSI.
     
   - <span class="parameter">tools</span> `String` (comma-separated), Default: harmony<br> 
-    Defines the method used to run batch correction, multiple can be selected.
-    choices: harmony, bbknn
+    Defines the method used to run batch correction.
+    Multiple can be selected by specifying them as a comma-seprated string without spaces.
+    Available options are: harmony, bbknn, and combat
        
    - <span class="parameter">column</span> `String` (comma-separated), Default: sample_id<br>
-
-     The column you want to batch correct on, if a comma-separated list is specified then all will be used simultaneously
+     The column you want to batch correct on.
+     If a comma-separated list is provided then all will be used simultaneously.
  
-### Harmony arguments
+#### Harmony arguments
   
 - <span class="parameter">harmony:</span>
     Basic parameters required to run harmony:
@@ -238,23 +242,21 @@ Parameters to compute the connectivity graph on Protein
   For more information on `harmony` check the [harmony documentation](https://portals.broadinstitute.org/harmony/reference/RunHarmony.html)
 
   
-### BBKNN arguments
-
-
+#### BBKNN arguments
 
 - <span class="parameter">bbknn:</span>  
 
   - <span class="parameter">neighbors_within_batch:</span> `Integer`, Default: 3<br>
 
-For more information on `bbknn` check the [bbknn documentation](https://bbknn.readthedocs.io/en/latest/)
+For more information on `bbknn` check the [bbknn documentation](https://bbknn.readthedocs.io/en/latest/).
   
 
-### Find neighbour parameters 
+#### Find neighbour parameters 
 
 - <span class="parameter">neighbors:</span> `String` <br>
  
   - <span class="parameter">npcs</span> `Integer`, Default: 30<br>   
-   Number of principal components to calculate for neighbors and Umap
+   Number of principal components to calculate for neighbors and UMAP.
   
   -  <span class="parameter">k</span> `Integer`, Default: 30<br>
   Number of neighbors
@@ -270,21 +272,22 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
 <span class="parameter">multimodal:</span> 
 
  - <span class="parameter">run</span> `Boolean`, Default: True<br>
- Leave False if you don't want to run multimodal integration 
+ Set to False if you don't want to run multimodal integration 
 
  - <span class="parameter">tools</span> `String`(Comma separated), Default: "WNN"<br>
  Method you want to use to run batch correction. Options include: WNN, totalvi and multiVI. You can specify mutiple methods and they will be run simultaneously. 
 
  - <span class="parameter">column_categorical</span> `String`(Comma separated), Default: sample_id<br>
- This is the column you want to run a batch correction on, multiple can be selected simultaneously. 
+ This is the column you want to run a batch correction on.
+ Mltiple columns can be selected simultaneously by providing them as a comma-separated string without spaces.
 
  Extra parameters: 
  
 ### TotalVI arguments
 
-  **totalvi has to run on both rna and protein data**
+  **TotalVI has to run on both rna and protein data**
   
-   These are the basic totalvi parameters required, you can add more if it fits your analysis better. 
+   This is the minimal set of TotalVI parameters required, you can add more if it fits your analysis better. 
 
  
  - <span class="parameter">totalvi:</span> 
@@ -293,8 +296,7 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
    -  <span class="parameter">exclude_mt_genes</span> `Boolean`, Default: True<br>
    -  <span class="parameter">mt_column</span> `String`, Default: mt<br>
    -  <span class="parameter">filter_by_hvg</span> `Boolean`, Default: True<br>
-   
-   To filter manually create a column called prot_outliers in mdata['prot']
+      To filter manually create a column called prot_outliers in mdata['prot']
    
    -  <span class="parameter">filter_prot_outliers</span> `Boolean`, Default: False<br>
    -  <span class="parameter">model_args</span>:
@@ -310,9 +312,10 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
 
   **MultiVI has to run on both rna and atac data**
 
-   These are the basic multivi parameters required, you can add more if it fits your analysis better. 
+   This is the minimal set of MultiVI parameters required, you can add more if it fits your analysis better. 
    
-   By setting lowmen to True it will subset the atac to the top 25k HVF which is recommended to deal with the concatenation of atac and rna on large datasets which at the moment is required by `scvi-tools`. Note that >100GB of RAM are required to concatenate atac,rna with 15k cells and 120k total features (union rna,atac)
+   Setting `lowmem` to True it will subset the ATAC data to the top 25k HVF which is recommended to deal with the concatenation of atac and rna on large datasets which at the moment is required by `scvi-tools`.
+   Note that >100GB of RAM are required to concatenate ATAC and RNA data with 15k cells and 120k total features (union rna,atac)
 
  -  <span class="parameter">MultiVI:</span>
   
@@ -328,30 +331,29 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
         -  <span class="parameter">max_epochs</span> `Integer`, Default: 500<br>
         -  <span class="parameter">lr</span> `Float`, Default: 0.0001<br>
         -  <span class="parameter">use_gpu</span> `String`, Default: None<br>
-     Leave blank for default str, int and bool.
+           Leave blank for default str, int and bool.
         -  <span class="parameter">train_size</span> `Float`, Default: 0.9<br>
         -  <span class="parameter">validation_size</span> `String`, Default: None<br>
-    Leave blank for default
+           Leave blank for default
         -  <span class="parameter">batch_size</span> `Integer`, Default: 128<br>
         -  <span class="parameter">weight_decay</span> `Float`, Default: 0.001<br>
         -  <span class="parameter">eps</span> `Float`, Default: 1e-08<br>
         -  <span class="parameter">early_stopping</span> `Boolean`, Default: True<br>
         -  <span class="parameter">save_best</span> `Boolean`, Default: True<br>
         -  <span class="parameter">check_val_every_n_epoch</span> `String`, Default: None<br>
-   Leave blank for the default integer
+           Leave blank for the default integer
         -  <span class="parameter">n_steps_kl_warmup</span> `String`, Default: None<br>
-   Leave blank for the default integer
+           Leave blank for the default integer
         -  <span class="parameter">n_epochs_kl_warmup</span> `Integer`, Default: 50<br>
         -  <span class="parameter">adversarial_mixing</span> `Boolean`, Default: True<br>
   -  <span class="parameter">training_plan</span> `String`, Default: None<br>
   
   
-### Mofa
+### Mofa arguments
 
 **Requires at least two modalities, can run with three**
   
-  These are the basic mofa parameters required, you can add more if it fits your analysis better. 
-  
+  This is the minimal set of Mofa parameters required, you can add more if it fits your analysis better. 
 
 -  <span class="parameter">mofa:</span> 
    -  <span class="parameter">modalities</span> `String` (Comma separated), Default: rna,prot,atac<br>
@@ -359,21 +361,23 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
    -  <span class="parameter">n_factors</span> `Integer`, Default: 10<br>
    -  <span class="parameter">n_iterations</span> `Integer`, Default: 1000<br>
    -  <span class="parameter">convergence_mode</span> `String`, Default: fast<br>
-    Choice between fast, medium, and slow
+      Choice between fast, medium, and slow
    -  <span class="parameter">save_parameters</span> `Boolean`, Default: False<br>
    -  <span class="parameter">outfile</span> `String`, Default: `path/to/h5ad/to_save_model_to`<br> 
 
-### WNN
+### WNN arguments
 
 **Requires at least two modalities, can run with three**
 
- These are the basic WNN parameters required, you can add more if it fits your analysis better. 
+ This is the minimal set of WNN parameters required, you can add more if it fits your analysis better.
+ Panpipes uses muon's implementation of WNN. 
  
-- <span class="parameter">WNN:</span> 
+- <span class="parameter">WNN:</span>
+    
   -  <span class="parameter">modalities</span> `String` (Comma separated), Default: rna, prot, atac <br>
   -  <span class="parameter">batch_corrected</span> `String`, Default: None<br>
   
-    Set the modality to one method ("bbknn", "scVI", "harmony", "scanorama"), if left None, a default de novo calculation of neighbours on non-corrected data for that modality using specified parameters
+       Set the modality to one method ("bbknn", "scVI", "harmony", "scanorama"), if left None, a default de novo calculation of neighbours on non-corrected data for that modality using specified parameters
      -  <span class="parameter">rna</span> `String`, Default: None<br>
         Options here include "bbknn" and "harmony"
 
@@ -388,7 +392,7 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
       -  <span class="parameter">atac</span> `String`, Default: *atac_neighbors<br>
 
    - <span class="parameter">n_neighbors</span> `String`, Default: "leave blank"<br>
-  Leave blank to arithmetic mean across modalities neighbors 
+     Leave blank to arithmetic mean across modalities neighbors 
 
    - <span class="parameter">n_bandwidth_neighbors</span> `Integer`, Default: 20<br>
 
@@ -398,11 +402,13 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
 
    - <span class="parameter">low_memory</span> `Boolean`, Default: True<br>
 
-
+### KNN calculation for multimodal analysis
   - <span class="parameter">neighbors:</span> 
       -  <span class="parameter">npcs</span> `Integer`, Default: 30<br>
-
-    The number of principal components to calculate for neighbors and umap. If no correction is applied PCA will be calculated and used to run the UMAP. If harmony is chosen it will use the following components to create a corrected dimensionality reduction 
+         The number of principal components to calculate for neighbors and UMAP.
+         If no correction is applied PCA will be calculated and used to run the UMAP.
+         If harmony is chosen it will use the following components to create a corrected dimensionality reduction.
+    
      -  <span class="parameter">k</span> `Integer`, Default: 30<br>
      -  <span class="parameter">metric</span> `String`, Default: euclidean<br>
    Options include euclidean and cosine
@@ -411,27 +417,31 @@ For more information on `bbknn` check the [bbknn documentation](https://bbknn.re
    Options include scanpy and hnsw
 
 
-### Plot 
+## Plotting parameters 
 
-- <span class="parameter">plotqc:</span> 
-Grouping must be a categorical variable
+- <span class="parameter">plotqc:</span> <br>
    -  <span class="parameter">grouping_var</span> `String`, Default: sample_id<br>
+      Column name(s) of the covariate(s) you want to group the plot on. Must be a categorical variable.
+      Must be provided as a comma-separated String, without spaces.
+   
+Specify other metrics you want to plot on each modalities embedding. One plot per group will be created.
+Use the notation mod:variable notation.
+These can be categorical or numeric variables.
+Any metrics you may want to plot on all modality UMAPs should be listed under `all`.
    -  <span class="parameter">all</span> `String`, Default: rep:receptor_subtype<br>
-
- Any metrics you may want to plot on all modality umaps should be listed under all the modalities
-
    -  <span class="parameter">rna</span> `String`, Default: rna:total_counts<br>
    -  <span class="parameter">prot</span> `String`, Default: prot:total_counts<br>
    -  <span class="parameter">atac</span>  `String`, Default: atac:total_counts<br>
    -  <span class="parameter">multimodal</span> `String`, Default: rna:total_counts<br>
 
+If you want to add any additional plots, simply remove the log file (logs/plot_batch_corrected_umaps.log) and run `panpipes integration make plot_umaps`.
 
-
-### Make final object 
+## Creating the final object 
 
 Leave this final option blank until you have reviewed the results from running `papipes integration make full`. 
 
-This step will produce a mudata object with one layer and one correction per modality, and one multimodal layer. For unimodal integration select the uncorrected version and use "no_correction". 
+This step will produce a mudata object with one layer and one correction per modality, and one multimodal layer.
+For unimodal integration select the uncorrected version and use "no_correction". 
 
 **Then run**`panpipes integration make merge_integration`
 
diff --git a/docs/yaml_docs/pipeline_preprocess_yml.md b/docs/yaml_docs/pipeline_preprocess_yml.md
index cd9cab75..634fbc34 100644
--- a/docs/yaml_docs/pipeline_preprocess_yml.md
+++ b/docs/yaml_docs/pipeline_preprocess_yml.md
@@ -424,9 +424,10 @@ Whether applying scaling or not is still a matter of debate, as stated in the [L
   - <span class="parameter">color_by</span> `String`, Default: sample_id<br>
         Specify the covariate you want to use to color the dimensionality reduction plot.
 
-  - <span class="parameter">dim_remove</span> `TODO`<br>
+  - <span class="parameter">dim_remove</span> `Integer`<br>
         Whether to remove the component(s) associated to technical artifacts.
         For instance, it is common to remove the first LSI component, as it is often associated with batch effects.
+        Specify `1` to remove the first component.
         Leave blank to avoid removing any.
 
 
diff --git a/panpipes/panpipes/pipeline_integration/pipeline.yml b/panpipes/panpipes/pipeline_integration/pipeline.yml
index 0c311093..a3cea274 100644
--- a/panpipes/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/panpipes/pipeline_integration/pipeline.yml
@@ -1,76 +1,59 @@
 # ============================================================
 # Integration workflow Panpipes (pipeline_integration.py)
 # ============================================================
-# written by Charlotte Rich-Griffin, Fabiola Curion
+# This file contains the parameters for the integration workflow.
+# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_integration_yml.html
 
-# compute resource options
-# ------------------------
+
+#--------------------------
+# Compute resources options
+#--------------------------
 resources:
-  # Number of threads used for parallel jobs
-  # this must be enough memory to load your mudata and do computationally intensive tasks
   threads_high: 1
-  # this must be enough memory to load your mudata and do computationally light tasks
   threads_medium: 1
-  # this must be enough memory to load text files and do plotting, requires much less memory than the other two
   threads_low: 1
-  # if you access to a gpu-specific queue, how many gpu threads to request, make sure to edit the queues section below,
-  # so that panpipes can find your gpu queue
+
   threads_gpu: 2
-# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
+
 condaenv:
 
-# allows for tweaking which queues jobs get submitted to, 
-# in case there is a special queue for long jobs or you have access to a gpu-specific queue
-# the default queue should be specified in your .cgat.yml file
-# leave blank if you do not want to use the alternative queues
 queues:
-  long: 
-  gpu:  
+  long:
+  gpu:
+
+# --------------------------------
+# Loading and merging data options
+# --------------------------------
 
-# Start
-# --------------------------
-# either one that exists already with
+# ----------------------------
+# Data format
 sample_prefix: test
-#this is what comes out of the filtering/preprocessing
 preprocessed_obj: ../preprocess/test.h5mu
-# contains layers: raw_counts, logged_counts, and has scaled or logged counts in X 
 
 
-#--------------------------
+#-----------------
 # Batch correction
-# -------------------------
-# unimodal: correct each modality independently
+# ----------------
+# Batch correction is done unimodal, meaning each modality is batch corrected independently
+
+# ------------
+# RNA modality
 rna:
-  # True or false depending on whether you want to run batch correction
-  run: True 
-  # what method(s) to use to run batch correction, you can specify multiple 
-  # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces)
+  run: True
   tools: harmony,bbknn,scanorama,scvi
-  # this is the column you want to batch correct on. if you specify a comma separated list, 
-  # they will be all used simultaneosly. 
-  # Specifically all columns specified will be merged into one 'batch' columns.
-  # if you want to test correction for one at a time, 
-  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
-  # integration_by_tissue ...
   column: sample_id 
-  #-----------------------------
-  # Harmony args
-  #-----------------------------
+
+  # Harmony arguments
   harmony:
-  # sigma value, used by Harmony
-    sigma: 0.1 
-  # theta value used by Harmony, default is 1
+    sigma: 0.1
     theta: 1.0
-  # number of pcs, used by Harmony
     npcs: 30
-  #----------------------------
+
   # BBKNN args # https://bbknn.readthedocs.io/en/latest/
-  #-----------------------------
   bbknn:
     neighbors_within_batch: 
-  #-----------------------------
+
   # SCVI args
-  #-----------------------------
   scvi:
     exclude_mt_genes: True
     mt_column: mt
@@ -89,143 +72,81 @@ rna:
         lr_scheduler_metric: 
         lr_patience: 8
         lr_factor: 0.1
-  #----------------------------
-  # find neighbour parameters
-  #-----------------------------
-  # to reuse these params, (for example for WNN) please use anchors (&) and scalars (*) in the relevant place
-  # i.e. &rna_neighbors will be called by *rna_neighbors where referenced
-  neighbors: &rna_neighbors 
-  # number of Principal Components to calculate for neighbours and umap:
-  #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
-  #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
-  # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
-  # note: scvelo default is 30
+
+  # Find neighbour parameters
+  neighbors: &rna_neighbors
     npcs: 30
-    # number of neighbours
     k: 30
-    # metric: euclidean | cosine
     metric: euclidean
-    # scanpy | hnsw (from scvelo)
     method: scanpy
 
-#--------------------------
+# ----------------
+# Protein modality
 prot:
-  # True or false depending on whether you want to run batch correction
-  run: True 
-  # what method(s) to use to run batch correction, you can specify multiple 
-  # choices: harmony,bbknn,combat
+  run: True
   tools: harmony
-  # this is the column you want to batch correct on. if you specify a comma separated list (no spaces), 
-  # they will be all used simultaneosly. if you want to test correction for one at a time, 
-  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
-  # integration_by_tissue ...
   column: sample_id 
-  #----------------------------
+
   # Harmony args
-  #-----------------------------
   harmony:
-  # sigma value, used by Harmony
-    sigma: 0.1 
-  # theta value used by Harmony, default is 1
+    sigma: 0.1
     theta: 1.0
-  # number of pcs, used by Harmony
     npcs: 30
-  #----------------------------
+
   # BBKNN args # https://bbknn.readthedocs.io/en/latest/
-  #-----------------------------
   bbknn:
     neighbors_within_batch: 
-  #----------------------------›
-  # find neighbour parameters
-  #-----------------------------
+
+  # Find neighbour parameters
   neighbors: &prot_neighbors
-    # number of Principal Components to calculate for neighbours and umap:
-    #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
-    #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
-    # note: scvelo default is 30
     npcs: 30
-    # number of neighbours
     k: 30
-    # metric: euclidean | cosine
     metric: euclidean
-    # scanpy | hnsw (from scvelo)
     method: scanpy
-#--------------------------
+
+# -------------
+# ATAC modality
 atac:
-  # True or false depending on whether you want to run batch correction
   run: False
-  # which dimensionality reduction to expect, LSI or PCA
-  dimred: PCA 
-  # what method(s) to use to run batch correction, you can specify multiple 
-  # (comma-seprated string, no spaces)
-  # choices: harmony,bbknn,combat
-  tools: 
-  # this is the column you want to batch correct on. if you specify a comma separated list, 
-  # they will be all used simultaneosly. if you want to test correction for one at a time, 
-  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
-  # integration_by_tissue ...
+  dimred: PCA
+  tools:
   column: sample_id 
-  #----------------------------
+
   # Harmony args
-  #-----------------------------
   harmony:
-  # sigma value, used by Harmony
-    sigma: 0.1 
-  # theta value used by Harmony, default is 1
+    sigma: 0.1
     theta: 1.0
-  # number of pcs, used by Harmony
     npcs: 30
-  #----------------------------
+
   # BBKNN args # https://bbknn.readthedocs.io/en/latest/
-  #-----------------------------
   bbknn:
     neighbors_within_batch: 
-  #----------------------------
-  # find neighbour parameters
-  #-----------------------------
+
+  # Find neighbour parameters
   neighbors: &atac_neighbors
-    # number of Principal Components to calculate for neighbours and umap:
-    #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
-    #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
-    # note: scvelo default is 30
     npcs: 30
-    # number of neighbours
     k: 30
-    # metric: euclidean | cosine
     metric: euclidean
-    # scanpy | hnsw (from scvelo)
     method: scanpy
-#----------------------------------------------
+
+
+#-----------------------
 # multimodal integration
+# ----------------------
 # remember to specify knn graph params in the section "neighbors"
-#----------------------------------------------
 multimodal:
-  # True or false depending on whether you want to run batch correction
-  run: True 
-  # what method(s) to use to run batch correction, you can specify multiple 
-  # choices: totalvi, mofa, MultiVI, WNN
-  # list e.g. below
+  run: True
   tools: 
     - WNN
     - totalvi
     - multiVI
-
-  # this is the column you want to batch correct on. if you specify a comma separated list, 
-  # they will be all used simultaneosly. if you want to test correction for one at a time, 
-  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
-  # integration_by_tissue ...
   column_categorical: sample_id 
-  # extra params:
+
+  # TotalVI arguments
   totalvi:
-    # this is a minimal set of parameters that will be expected
-    # you can add any other param from the tutorials and they will
-    # be parsed alongside the others
-  
-    # totalvi will run on rna and prot
     modalities: rna,prot
     exclude_mt_genes: True
     mt_column: mt
-    # to filter outliers manually create a column called prot_outliers in mdata['prot'].obs
     filter_by_hvg: True
     filter_prot_outliers: False
     model_args: 
@@ -235,148 +156,90 @@ multimodal:
       train_size: 0.9
       early_stopping: True
     training_plan: None
+
+  # MultiVI arguments
   MultiVI:
-    # this is a minimal set of parameters that will be expected
-    # you can add any other param from the tutorials and they will
-    # be parsed alongside the others
-    # leave arguments blank for default
     lowmem: True
-    # Set lowmem to True will subset the atac to the top 25k HVF. 
-    # This is to deal with concatenation of atac,rna on large datasets which at the moment is suboptimally required by scvitools.
-    # >100GB of RAM are required to concatenate atac,rna with 15k cells and 120k total features (union rna,atac)
     model_args:
-      # (default: None)
-      n_hidden :  
-      # (default: None)
-      n_latent :  
-      #(bool,default: True)
-      region_factors : True 
-       #{‘normal’, ‘ln’} (default: 'normal')
+      n_hidden :
+      n_latent :
+      region_factors : True
       latent_distribution : 'normal'
-      #(bool,default: False)
-      deeply_inject_covariates : False 
-      #(bool, default: False)
-      fully_paired : False 
+      deeply_inject_covariates : False
+      fully_paired : False
+
     training_args:
-      #(default: 500)
-      max_epochs : 500 
-      #float (default: 0.0001)
-      lr : 0.0001 
-      #leave blanck for default str | int | bool | None (default: None)
+      max_epochs : 500
+      lr : 0.0001
       use_gpu :
-      # float (default: 0.9)
-      train_size : 0.9 
-      # leave blanck for default, float | None (default: None)
-      validation_size : 
-      # int (default: 128)
+      train_size : 0.9
+      validation_size :
       batch_size : 128
-      #float (default: 0.001)
-      weight_decay : 0.001 
-      #float (default: 1e-08)
-      eps : 1e-08 
-      #bool (default: True)
-      early_stopping : True 
-      #bool (default: True)
+      weight_decay : 0.001
+      eps : 1e-08
+      early_stopping : True
       save_best : True
-       #leave blanck for default int | None (default: None)
       check_val_every_n_epoch :
-      #leave blanck for default int | None (default: None)
-      n_steps_kl_warmup : 
-       # int | None (default: 50)
+      n_steps_kl_warmup :
       n_epochs_kl_warmup : 50
-      #bool (default: True)
-      adversarial_mixing : True 
-       #leave blanck for default dict | None (default: None)
+      adversarial_mixing : True
     training_plan :
+
+  # Mofa arguments
   mofa:
-    # this is a minimal set of parameters that will be expected
-    # you can add any other param from the tutorials and they will
-    # be parsed alongside the others
-    #  (comma-separated string, no spaces)
     modalities: rna,prot,atac
     filter_by_hvg: True
     n_factors: 10
     n_iterations: 1000
-    #pick one among fast, medium, slow
     convergence_mode: fast
     save_parameters: False
-    #if save_parameters True, set the following, otherwise leave blank
     outfile: path/to/h5ad/to_save_model_to
+
+  # WNN arguments
   WNN:
-    # muon implementation of WNN 
-    modalities: rna,prot,atac 
-    # run wnn on batch corrected unimodal data, set each of the modalities you want to use to calc WNN to ONE method.
-    # leave to None and it will default to de novo calculation of neighbours on non corrected data for that modality using specified params 
+    modalities: rna,prot,atac
     batch_corrected:
-      # options are: "bbknn", "scVI", "harmony", "scanorama"
       rna: None
-      # options are "harmony", "bbknn"
       prot: None
-      # options are "harmony"
-      atac: None 
-    # please use anchors (&) and scalars (*) in the relevant place
-    # i.e. &rna_neighbors will be called by *rna_neighbors where referenced
+      atac: None
+
+    # please use anchors (&) and scalars (*) if necessary
     knn:
       rna: *rna_neighbors
       prot: *prot_neighbors
       atac: *atac_neighbors
-    #WNN has its own neighbors search, specify here  
-    n_neighbors: #leave blank and it will default to aritmetic mean across modalities neighbors
+
+    # WNN neighbour search
+    n_neighbors:
     n_bandwidth_neighbors: 20
     n_multineighbors: 200
     metric: 'euclidean'
     low_memory: True
-  
-  ###
-  # neighbours knn calculation for multimodal analysis. 
-  ###
+
+  # KNN calculation for multimodal analysis
   neighbors:
-    # number of Principal Components to calculate for neighbours and umap:
-    #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
-    #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
-    # note: scvelo default is 30
     npcs: 30
-    # number of neighbours
     k: 30
-    # metric: euclidean | cosine
     metric: euclidean
-    # scanpy | hnsw (from scvelo)
     method: scanpy
 
 
-
-#-----------------------------
-# Plot
-#-----------------------------
+#--------------------
+# Plotting parameters
+#--------------------
 plotqc:
-  # grouping var must be a categorical varible, 
-  #  (comma-seprated strings, no spaces)
-  # umaps comparing the integration (one plot per value in the group)
-  # for each batch correction column plus any extras in grouping var
   grouping_var: sample_id
-  # what other metrics do you want to plot on each modalities embedding, (one plot per group)
-  # use mod:variable notation, 
-  # any metrics that you want to plot on all modality umaps go under "all"
-  # these can be categorical or numeric
+
   all: rep:receptor_subtype
   rna: rna:total_counts
   prot: prot:total_counts
   atac:
   multimodal: rna:total_counts
-  # if you want to add any additional plots, just remove the log file logs/plot_batch_corrected_umaps.log
-  # and run panpipes integration make plot_umaps
 
-# ----------------
-# Make final object
-# ----------------
-# Final choices: Leave blank until you have reviewed the results from running
-# panpipes integration make full
-# This step will produce a mudata object with one layer per modality with 
-# one correction per modality and one multimodal layer.
-# Choose the integration results you want to merge in the final object
-# For unimodal integration: to pick the uncorrected version use "no_correction"
-# then run
-# panpipes integration make merge_integration
+
+# -------------------------
+# Creating the final object
+# -------------------------
 final_obj:
   rna:
     include: True
@@ -390,4 +253,3 @@ final_obj:
   multimodal:
     include: True
     bc_choice: totalvi
-