diff --git a/faqs/galaxy/histories_datasets_vs_collections.md b/faqs/galaxy/histories_datasets_vs_collections.md index 75066db99e67e1..7c7c4b88d942ce 100644 --- a/faqs/galaxy/histories_datasets_vs_collections.md +++ b/faqs/galaxy/histories_datasets_vs_collections.md @@ -25,7 +25,7 @@ There is a number of situations when simple collections are not sufficient to re **Nested collections** -Probably the most common example of this is pared end data when each sample is represented by two files: one containing forward reads and another containing reverse reads. In Galaxy you can create **nested** collection that reflects the hierarchy of the data. In the case of paired data Galaxy supports **paired** collections. +Probably the most common example of this is paired end data when each sample is represented by two files: one containing forward reads and another containing reverse reads. In Galaxy you can create **nested** collection that reflects the hierarchy of the data. In the case of paired data Galaxy supports **paired** collections. ![A paired collection is a container containing individual datasets and preserving their hierarchy]({% link shared/images/paired_collection.svg %}) diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/Batch-correction-pipeline-overview.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/Batch-correction-pipeline-overview.png new file mode 100644 index 00000000000000..2f92797b3f9461 Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/Batch-correction-pipeline-overview.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-leiden.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-leiden.png new file mode 100644 index 00000000000000..c1e13bb1f5a378 Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-leiden.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-spectral.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-spectral.png new file mode 100644 index 00000000000000..c2a52b297c5d49 Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-spectral.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-no_harmony-spectral.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-no_harmony-spectral.png new file mode 100644 index 00000000000000..ec585e9b34ea23 Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-no_harmony-spectral.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/harmony-graphical-abstract.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/harmony-graphical-abstract.png new file mode 100644 index 00000000000000..e08fda414172f8 Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/harmony-graphical-abstract.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/mnc-correct-scanorama-umap-spectral.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/mnc-correct-scanorama-umap-spectral.png new file mode 100644 index 00000000000000..d36bdec922684c Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/mnc-correct-scanorama-umap-spectral.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/mnn-graphical-abstract.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/mnn-graphical-abstract.png new file mode 100644 index 00000000000000..9010252dd6d4ea Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/mnn-graphical-abstract.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/scanorama-graphical-abstract.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/scanorama-graphical-abstract.png new file mode 100644 index 00000000000000..9d53f15f94c55b Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/scanorama-graphical-abstract.png differ diff --git a/topics/single-cell/images/scatac-batch-correction-snapatac2/tsse-colon01-colon02.png b/topics/single-cell/images/scatac-batch-correction-snapatac2/tsse-colon01-colon02.png new file mode 100644 index 00000000000000..ac0ac86648defc Binary files /dev/null and b/topics/single-cell/images/scatac-batch-correction-snapatac2/tsse-colon01-colon02.png differ diff --git a/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/data-library.yaml b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/data-library.yaml new file mode 100644 index 00000000000000..d199795e971643 --- /dev/null +++ b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/data-library.yaml @@ -0,0 +1,43 @@ +--- +destination: + type: library + name: GTN - Material + description: Galaxy Training Network Material + synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org +items: +- name: The new topic + description: Summary + items: + - name: scATAC-seq standard processing with SnapATAC2 + items: + - name: 'DOI: 10.5281/zenodo.12683310' + description: latest + items: + - url: https://zenodo.org/api/records/12683310/files/colon_multisample_01.gz + src: url + ext: auto + info: https://zenodo.org/records/12683310 + - url: https://zenodo.org/api/records/12683310/files/colon_multisample_02.gz + src: url + ext: auto + info: https://zenodo.org/records/12683310 + - url: https://zenodo.org/api/records/12683310/files/colon_multisample_03.gz + src: url + ext: auto + info: https://zenodo.org/records/12683310 + - url: https://zenodo.org/api/records/12683310/files/colon_multisample_04.gz + src: url + ext: auto + info: https://zenodo.org/records/12683310 + - url: https://zenodo.org/api/records/12683310/files/colon_multisample_05.gz + src: url + ext: auto + info: https://zenodo.org/records/12683310 + - url: https://zenodo.org/api/records/12683310/files/gencode.v46.annotation.gtf.gz/ + src: url + ext: auto + info: https://zenodo.org/records/12683310 + - url: https://zenodo.org/api/records/12683310/files/chrom_sizes.txt/ + src: url + ext: auto + info: https://zenodo.org/records/12683310 \ No newline at end of file diff --git a/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/faqs/index.md b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/faqs/index.md new file mode 100644 index 00000000000000..9ce3fe4fce824b --- /dev/null +++ b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/faqs/index.md @@ -0,0 +1,3 @@ +--- +layout: faq-page +--- diff --git a/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/tutorial.bib b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/tutorial.bib new file mode 100644 index 00000000000000..574f682b8bff78 --- /dev/null +++ b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/tutorial.bib @@ -0,0 +1,111 @@ + +# This is the bibliography file for your tutorial. +# +# To add bibliography (bibtex) entries here, follow these steps: +# 1) Find the DOI for the article you want to cite +# 2) Go to https://doi2bib.org and fill in the DOI +# 3) Copy the resulting bibtex entry into this file +# +# To cite the example below, in your tutorial.md file +# use {% cite Batut2018 %} +# +# If you want to cite an online resourse (website etc) +# you can use the 'online' format (see below) +# +# You can remove the examples below + +@article{Batut2018, + doi = {10.1016/j.cels.2018.05.012}, + url = {https://doi.org/10.1016/j.cels.2018.05.012}, + year = {2018}, + month = jun, + publisher = {Elsevier {BV}}, + volume = {6}, + number = {6}, + pages = {752--758.e1}, + author = {B{\'{e}}r{\'{e}}nice Batut and Saskia Hiltemann and Andrea Bagnacani and Dannon Baker and Vivek Bhardwaj and + Clemens Blank and Anthony Bretaudeau and Loraine Brillet-Gu{\'{e}}guen and Martin {\v{C}}ech and John Chilton + and Dave Clements and Olivia Doppelt-Azeroual and Anika Erxleben and Mallory Ann Freeberg and Simon Gladman and + Youri Hoogstrate and Hans-Rudolf Hotz and Torsten Houwaart and Pratik Jagtap and Delphine Larivi{\`{e}}re and + Gildas Le Corguill{\'{e}} and Thomas Manke and Fabien Mareuil and Fidel Ram{\'{i}}rez and Devon Ryan and + Florian Christoph Sigloch and Nicola Soranzo and Joachim Wolff and Pavankumar Videm and Markus Wolfien and + Aisanjiang Wubuli and Dilmurat Yusuf and James Taylor and Rolf Backofen and Anton Nekrutenko and Bj\"{o}rn Gr\"{u}ning}, + title = {Community-Driven Data Analysis Training for Biology}, + journal = {Cell Systems} +} +@article{Korsunsky2019, + title = {Fast, sensitive and accurate integration of single-cell data with Harmony}, + volume = {16}, + ISSN = {1548-7105}, + url = {http://dx.doi.org/10.1038/s41592-019-0619-0}, + DOI = {10.1038/s41592-019-0619-0}, + number = {12}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media LLC}, + author = {Korsunsky, Ilya and Millard, Nghia and Fan, Jean and Slowikowski, Kamil and Zhang, Fan and Wei, Kevin and Baglaenko, Yuriy and Brenner, Michael and Loh, Po-ru and Raychaudhuri, Soumya}, + year = {2019}, + month = nov, + pages = {1289–1296} +} +@article{Hie2019, + title = {Efficient integration of heterogeneous single-cell transcriptomes using Scanorama}, + volume = {37}, + ISSN = {1546-1696}, + url = {http://dx.doi.org/10.1038/s41587-019-0113-3}, + DOI = {10.1038/s41587-019-0113-3}, + number = {6}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media LLC}, + author = {Hie, Brian and Bryson, Bryan and Berger, Bonnie}, + year = {2019}, + month = may, + pages = {685–691} +} +@article{Haghverdi2018, + title = {Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors}, + volume = {36}, + ISSN = {1546-1696}, + url = {http://dx.doi.org/10.1038/nbt.4091}, + DOI = {10.1038/nbt.4091}, + number = {5}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media LLC}, + author = {Haghverdi, Laleh and Lun, Aaron T L and Morgan, Michael D and Marioni, John C}, + year = {2018}, + month = apr, + pages = {421–427} +} +@article{Luecken2021, + title = {Benchmarking atlas-level data integration in single-cell genomics}, + volume = {19}, + ISSN = {1548-7105}, + url = {http://dx.doi.org/10.1038/s41592-021-01336-8}, + DOI = {10.1038/s41592-021-01336-8}, + number = {1}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media LLC}, + author = {Luecken, Malte D. and B\"{u}ttner, M. and Chaichoompu, K. and Danese, A. and Interlandi, M. and Mueller, M. F. and Strobl, D. C. and Zappia, L. and Dugas, M. and Colomé-Tatché, M. and Theis, Fabian J.}, + year = {2021}, + month = dec, + pages = {41–50} +} +@article{Zhang2024, + title = {A fast, scalable and versatile tool for analysis of single-cell omics data}, + volume = {21}, + ISSN = {1548-7105}, + url = {http://dx.doi.org/10.1038/s41592-023-02139-9}, + DOI = {10.1038/s41592-023-02139-9}, + number = {2}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media LLC}, + author = {Zhang, Kai and Zemke, Nathan R. and Armand, Ethan J. and Ren, Bing}, + year = {2024}, + month = jan, + pages = {217–227} +} +@online{gtn-website, + author = {GTN community}, + title = {GTN Training Materials: Collection of tutorials developed and maintained by the worldwide Galaxy community}, + url = {https://training.galaxyproject.org}, + urldate = {2021-03-24} +} diff --git a/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/tutorial.md b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/tutorial.md new file mode 100644 index 00000000000000..3aefe85685c0f4 --- /dev/null +++ b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/tutorial.md @@ -0,0 +1,530 @@ +--- +layout: tutorial_hands_on + +title: Multi-sample batch correction with Harmony and SnapATAC2 +subtopic: scmultiomics +priority: 3 +level: Intermediate +zenodo_link: https://zenodo.org/records/12683310 +questions: +- Why is batch correction important during the analysis of data from multiple samples? +- How is batch correction performed on single cell ATAC-seq data? +objectives: +- Perform batch correction on a dataset collection of multiple single cell ATAC-seq samples. +- Learn how Harmony and other batch correction algorithms remove batch effects. +time_estimation: 4H +key_points: +- Batch correction is important for the integration of data from multiple experiments +- Batch correction algorithms identify similar cells and move them closer together through appropriate correction vectors. +requirements: + - + type: "internal" + topic_name: single-cell + tutorials: + - scatac-preprocessing-tenx + - scatac-standard-processing-snapatac2 +tags: +- 10x +- epigenetics +abbreviations: + scATAC-seq: Single-cell Assay for Transposase-Accessible Chromatin using sequencing + QC: quality control + TSSe: transcription start site enrichment + TSS: transcription start sites + UMAP: Uniform Manifold Approximation and Projection +contributors: +- timonschlegel +gitter: Galaxy-Training-Network/galaxy-single-cell + + +--- + + +# Introduction + + +Performing experiments in replicates is a cornerstone of modern biological science. However, when integrating data from multiple single-cell sequencing experiments, technical confounders might impact the results. +To reduce technical confounders, such as different experimenters, experimental protocols, sequencing lanes or sequencing technologies, batch correction is often beneficial. + +In this tutorial, we will perform batch correction on five datasets of {scATAC-seq} data with the three algorithms *Harmony* ({% cite Korsunsky2019 %}), *Scanorama* ({% cite Hie2019 %}) and the *mutual nearest neighbor-based* ({% cite Haghverdi2018%}) algorithm *MNC-correct* ({% cite Zhang2024 %}). The {scATAC-seq} analysis will be performed with the tool suite [**SnapATAC2**](https://kzhang.org/SnapATAC2/version/2.5/index.html) ({% cite Zhang2024 %}). + +{% snippet topics/single-cell/faqs/single_cell_omics.md %} + +{% snippet faqs/galaxy/tutorial_mode.md %} + +> +> +> This tutorial is significantly based on the ["Multi-sample Pipeline" tutorial](https://kzhang.org/SnapATAC2/version/2.5/tutorials/integration.html) from **SnapATAC2**. +> +> The data analysis is performed with the same tools shown in the tutorial [Single-cell ATAC-seq standard processing with SnapATAC2]( {% link topics/single-cell/tutorials/scatac-standard-processing-snapatac2/tutorial.md %} ). +> That tutorial also explains the steps of the ATAC-seq analysis with SnapATAC2 in more detail. +> We recommend completing that tutorial before continuing with this one. +> +{: .comment} + +> +> +> In this tutorial, we will cover: +> +> 1. TOC +> {:toc} +> +{: .agenda} + +# Data + +In this tutorial we will analyze colon samples from multiple donors, provided by the [SnapATAC2 documentation](https://kzhang.org/SnapATAC2/version/2.5/tutorials/integration.html). The `chrom_sizes` file and the `gene_annotation` file are identical to the previous tutorial [Single-cell ATAC-seq standard processing with SnapATAC2]( {% link topics/single-cell/tutorials/scatac-standard-processing-snapatac2/tutorial.md %} ). +The five `colon_multisample` files have been generated by the **Cell Ranger ATAC 2.0.0** pipeline from 10X to generate a [*Fragments File*](https://support.10xgenomics.com/single-cell-atac/software/pipelines/latest/output/fragments). + + +> Chromosome sizes +> +> - A chromosome sizes file can be generated using the tool {% tool [Compute sequence length](toolshed.g2.bx.psu.edu/repos/devteam/fasta_compute_length/fasta_compute_length/1.0.3) %}. +> - The reference genome can either be selected from cached genomes or uploaded to the galaxy history. +> +{: .details} + +First we will import the datasets into Galaxy. Then we will build a dataset collection, containing all `colon_multisample` datasets. This will make the following analysis steps much simpler. + +{% snippet faqs/galaxy/histories_datasets_vs_collections.md %} + + +## Get data + +> Data Upload +> +> 1. Create a new history for this tutorial +> 2. Import the files from [Zenodo]({{ page.zenodo_link }}) or from +> the shared data library +> +> +> ``` +> {{ page.zenodo_link }}/files/colon_multisample_01.gz +> {{ page.zenodo_link }}/files/colon_multisample_02.gz +> {{ page.zenodo_link }}/files/colon_multisample_03.gz +> {{ page.zenodo_link }}/files/colon_multisample_04.gz +> {{ page.zenodo_link }}/files/colon_multisample_05.gz +> {{ page.zenodo_link }}/files/chrom_sizes.txt +> {{ page.zenodo_link }}/files/gencode.v46.annotation.gtf.gz +> ``` +> +> {% snippet faqs/galaxy/datasets_import_via_link.md %} +> +> {% snippet faqs/galaxy/datasets_import_from_data_library.md %} +> +> > Large file sizes! +> > - The `colon_multisample` datasets are quite large. The entire tutorial requires approximately 50 GB of storage. +> > - To reduce storage you can change the {% icon galaxy-gear %} **Upload Configuration** to *Defer dataset resolution* for the upload of the `colon_multisample` datasets. +> > - This will delete the datasets after the first analysis step and helps in reducing storage. +> > - You can also permanently delete datasets, which are no longer required. +> > +> > {% snippet faqs/galaxy/datasets_deleting.md %} +> > +> {: .warning} +> +> 3. Rename the datasets +> - {% icon galaxy-pencil %} **Rename** the file `gencode.v46.annotation.gtf.gz` to `gene_annotation.gtf.gz` +> +> {% snippet faqs/galaxy/datasets_rename.md %} +> +> 4. Check that the datatypes of the `colon_multisample` files are set to `bed` +> +> {% snippet faqs/galaxy/datasets_change_datatype.md datatype="bed" %} +> +> 5. Create a dataset collection with all `colon_multisample` datasets and rename the collection to `Colon Multisample Fragments`. +> +> {% snippet faqs/galaxy/collections_build_list.md name="Colon Multisample Fragments" %} +> +{: .hands_on} + +# SnapATAC2 preprocessing and filtering + +With our data imported and the collection built, we can now begin the {scATAC-seq} data preprocessing with SnapATAC2. + +The first step is importing the datasets into an AnnData object with the tool *pp.import_data*. Next, the {TSSe} will be calculated. The {TSS} serve as a {QC} measurement to selectively filter droplets containing high-quality cells. + +> AnnData format +> +> - The [**AnnData**](https://anndata.readthedocs.io/en/latest/) format was initially developed for the [**Scanpy**](https://scanpy.readthedocs.io/en/stable/index.html) package and is now a widely accepted data format to store annotated data matrices in a space-efficient manner. +> +> ![Anndata format]({% link topics/single-cell/images/scatac-standard-snapatac2/anndata_schema.svg %} "AnnData format stores a count matrix X together with annotations of observations (i.e. cells) obs, variables (i.e. genes) var and unstructured annotations uns.") +> +{: .details} + +> Preprocessing and QC +> +> 1. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Import data fragment files and compute basic QC metrics, using 'pp.import_data'` +> - {% icon param-collection %} *"Fragment file, optionally compressed with gzip or zstd"*: `Colon Multisample Fragments` (Input dataset collection) +> - {% icon param-file %} *"A tabular file containing chromosome names and sizes"*: `chrom_sizes.txt` (Input dataset) +> - *"Number of unique fragments threshold used to filter cells"*: `1000` +> 2. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Compute the TSS enrichment score (TSSe) for each cell, using 'metrics.tsse'` +> - {% icon param-collection %} *"Annotated data matrix"*: `Colon Multisample AnnDatas` (dataset collection output of **pp.import_data** {% icon tool %}) +> - {% icon param-file %} *"GTF/GFF file containing the gene annotation"*: `gene_annotation` (Input dataset) +> +> 2. Rename the generated collection to `Colon Multisample AnnDatas TSSe`. +> +> 3. {% tool [SnapATAC2 Plotting](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for plotting"*: `Plot the TSS enrichment vs. number of fragments density figure, using 'pl.tsse'` +> - {% icon param-collection %} *"Annotated data matrix"*: `Colon Multisample AnnDatas TSSe` (output of **metrics.tsse** {% icon tool %}) +> 4. {% icon galaxy-eye %} Inspect a few exemplary `.png` outputs of the collection +> +> ![TSSe plots against number of unique fragments]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/tsse-colon01-colon02.png %}"Examplary plots of TSSe from colon_multisample_01 and colon_multisample_02") +> +High-quality cells can be identified in the plot of {TSSe} scores against a number of unique fragments for each cell. +> +> > +> > +> > 1. Where are high-quality cells located in a {TSSe} plot? +> > 2. Based on these plots, how should the filter be set? +> > +> > > +> > > +> > > 1. The cells in the upper right are high-quality cells, enriched for {TSS}. Fragments in the lower left represent low-quality cells or empty droplets and should be filtered out. +> > > 2. Setting the minimum {TSSe} to 7.0 will filter out the lowest quality droplets without loosing too much data. +> > > +> > {: .solution} +> > +> {: .question} +> +{: .hands_on} + +## Filtering the count matrices + +The {TSSe} distributions show that the sample quality differs substantially between batches. In order to retain as much biological data as possible, we need to use a broad filter (f.ex. minimum TSSe = 7.0). + +> Filtering +> +> 1. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'` +> - {% icon param-collection %} *"Annotated data matrix"*: `Colon Multisample AnnDatas TSSe` (output of **metrics.tsse** {% icon tool %}) +> - *"Minimum TSS enrichemnt score required for a cell to pass filtering"*: `7.0` +> +> 2. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Generate cell by bin count matrix, using 'pp.add_tile_matrix'` +> - {% icon param-collection %} *"Annotated data matrix"*: `Colon Multisample AnnDatas filtered` (output of **pp.filter_cells** {% icon tool %}) +> - *"The size of consecutive genomic regions used to record the counts"*: `5000` +> +> 3. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Perform feature selection, using 'pp.select_features'` +> - {% icon param-collection %} *"Annotated data matrix"*: `Colon Multisample AnnDatas tile_matrix` (output of **pp.add_tile_matrix** {% icon tool %}) +> - *"Number of features to keep"*: `50000` +> +> > Bin size and features +> > +> > - *pp.add_tile_matrix* divides the genome into a specified number of bins, depending on the bin size (f.ex. 5000bp). For each bin, the ATAC-seq reads of individual cells are checked to determine if a read is located in the bin. This is counted as a feature and stored under `n_vars` in the AnnData object. +> > - Increasing the bin size greatly reduces compute time at the cost of some biological data. +> > - For this reason, the bin size 5000bp has been selected for the colon datasets. In the [previous tutorial]( {% link topics/single-cell/tutorials/scatac-standard-processing-snapatac2/tutorial.md %}#feature-selection), the lower bin size of 500bp was chosen, since fewer cells were analyzed. +> > - *pp.select_features* uses the previously identified features to select the most accessible features for further analysis. +> > - The parameter *"Number of features to keep"* determines the upper limit of features which can be selected. +> > - Similarly to the *bin_size*, the *Number of Features to keep* can also impact downstream clustering. This was demonstrated in the [previous tutorial]( {% link topics/single-cell/tutorials/scatac-standard-processing-snapatac2/tutorial.md %}#feature-selection ). +> {: .details} +> +> 4. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Compute probability of being a doublet using the scrublet algorithm, using 'pp.scrublet'` +> - {% icon param-collection %} *"Annotated data matrix"*: `Colon Multisample AnnDatas features` (output of **pp.select_features** {% icon tool %}) +> +> 5. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Remove doublets according to the doublet probability or doublet score, using 'pp.filter_doublets'` +> - {% icon param-file %} *"Annotated data matrix"*: `Colon Multisample AnnDatas scrublet` (output of **pp.scrublet** {% icon tool %}) +> +{: .hands_on} + +# Concatenate the Collection +Before we can continue with the analysis and batch correction, we need to extract the datasets out of the collection and merge them into a single AnnData object. + +## Extracting datasets from a collection +This is achieved in two steps: +1. The first dataset is extracted from the collection using *Extract dataset* +2. Afterwards, the first dataset will be removed from the collection. This is done by extracting element identifiers and filtering the collection with the name of the first dataset. + +> +> - It is also possible to manually remove a dataset from a collection. +> - However, manually removing datasets can not be implemented in Galaxy workflows. In contrast the automatic method, shown here, can be represented in a workflow and is therefore much better scalable. +{: .comment} + +> Extract datasets +> +> 1. {% tool [Extract element from collection](__EXTRACT_DATASET__) %} with the following parameters: +> - {% icon param-collection %} *"Input List"*: `Colon Multisample AnnDatas filtered_doublets` (output of **pp.filter_doublets** {% icon tool %}) +> - *"How should a dataset be selected?"*: `The first dataset` +> 2. {% tool [Extract element identifiers](toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2) %} with the following parameters: +> - {% icon param-collection %} *"Dataset collection"*: `Colon Multisample AnnDatas filtered_doublets` (output of **pp.filter_doublets** {% icon tool %}) +> +> 3. {% tool [Select first](Show beginning1) %} with the following parameters: +> - *"Select first"*: `1` +> - {% icon param-file %} *"from"*: `Element identifiers` (output of **Extract element identifiers** {% icon tool %}) +> 4. {% tool [Filter collection](__FILTER_FROM_FILE__) %} with the following parameters: +> - {% icon param-collection %} *"Input Collection"*: `Colon Multisample AnnDatas filtered_doublets` (output of **pp.filter_doublets** {% icon tool %}) +> - *"How should the elements to remove be determined?"*: `Remove if identifiers are PRESENT in file` +> - {% icon param-file %} *"Filter out identifiers present in"*: `select_first` (output of **Select first** {% icon tool %}) +> 5. {% icon galaxy-pencil %} Rename the filtered collection to `Colon Multisample 02-05` +> +{: .hands_on} + +## Concatenate AnnDatas + +> Concatenate +> +> 1. {% tool [Manipulate AnnData](toolshed.g2.bx.psu.edu/repos/iuc/anndata_manipulate/anndata_manipulate/0.10.3+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Annotated data matrix"*: `colon_multisample_01` (output of **Extract dataset** {% icon tool %}) +> - *"Function to manipulate the object"*: `Concatenate along the observations axis` +> - {% icon param-collection %} *"Annotated data matrix to add"*: `Colon Multisample 02-05` (output of **Filter collection** {% icon tool %}) +> - *"Join method"*: `Intersection of variables` +> - *"Key to add the batch annotation to obs"*: `batch` +> +> > Issues with the concatenation +> > - Depending on the size of the datasets, this operation can take a lot of time. With the `colon_multisample` datasets, the concatenation can run for 1h. +> > - For even larger datasets, the allocated memory of the tool might not be enough and the operation fails. In that case, you will receive the following error message: +> > +> > ``` +> > Fatal error: Exit code 137 () +> > ``` +> > - In such a case, please report the issue. The administrators can then increase the memory limit and the job will succeed. +> > +> > {% snippet faqs/galaxy/analysis_troubleshooting_reporting.md %} +> > +> {: .details} +> +> 2. {% icon galaxy-pencil %} Rename the AnnData output to `Multisample AnnData` +> 3. {% icon galaxy-eye %} Inspect the general information of `Multisample AnnData` +> +> > Inspecting AnnData objects +> > * Many toolsets producing outputs in *AnnData* formats in Galaxy, provide the general information by default: +> > * Click on the name of the dataset in the history to expand it. The general Anndata information will be given in the expanded box. +> > * Alternatively, expand the dataset and click on {% icon details %}*Dataset Details*. Scroll to Job Information and inspect the Tool Standard Output. +> > * If a tool does not provide the general AnnData information, or a more specific query is required, the tool {% tool [Inspect AnnData](toolshed.g2.bx.psu.edu/repos/iuc/anndata_inspect/anndata_inspect/0.10.3+galaxy0) %} can also be selected. +> {: .tip} +> +> > ``` +> > AnnData object with n_obs × n_vars = 34372 × 606219 +> > obs: 'n_fragment', 'frac_dup', 'frac_mito', 'tsse', 'doublet_probability', 'doublet_score', 'batch' +> > var: 'count-0', 'selected-0', 'count-1', 'selected-1', 'count-2', 'selected-2', 'count-3', 'selected-3', 'count-4', 'selected-4' +> > obsm: 'fragment_paired' +> > ``` +> > +> +> > +> > +> > 1. How many colon cells are stored in this AnnData object? +> > 2. What does the 'batch' annotation represent? +> > 3. What do the different annotations for 'count-' and 'selected-' stand for? +> > +> > > +> > > +> > > 1. There are 34372 cells. +> > > 2. The cells are marked with the 'batch' annotation according to their sample number (0-4). This annotation will be used later by the batch correction algorithms to produce clusters from multiple samples. +> > > 3. 'count' and 'selected' are variable annotations indicating the detected and selected features. The sample number (0-4) specifies the dataset which produced these annotations. +> > > +> > {: .solution} +> {: .question} +> +{: .hands_on} + +# Dimension Reduction +Now that all samples have been concatenated into a single AnnData object, the most accessible features of the combined count matrix must be selected. The previously selected features were dependent on the individual samples and can not be utilized for dimensionality reduction. Therefore the most accessible features will be selected once again and then the dimensionality of the data will be reduced through *matrix-free spectral embedding*. + +> Spectral embedding +> +> 1. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Perform feature selection, using 'pp.select_features'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData` (output of **Manipulate AnnData** {% icon tool %}) +> - *"Number of features to keep"*: `50000` +> +> 2. {% tool [SnapATAC2 Clustering](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1) %} with the following parameters: +> - *"Dimension reduction and Clustering"*: `Perform dimension reduction using Laplacian Eigenmap, using 'tl.spectral'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData features` (output of **pp.select_features** {% icon tool %}) +> - *"distance metric"*: `cosine` +> +> 3. {% icon galaxy-pencil %} Rename the AnnData output to `Multisample AnnData spectral` +> +{: .hands_on} + +## Control without batch correction +Batch effects can be visualized in an {UMAP} projection of the data. For this, the different samples are colored according to their batch annotation (obs: 'batch'). + +> UMAP projection without batch correction +> +> 1. {% tool [SnapATAC2 Clustering](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1) %} with the following parameters: +> - *"Dimension reduction and Clustering"*: `Compute Umap, using 'tl.umap'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData spectral` (output of **tl.spectral** {% icon tool %}) +> +> 2. {% icon galaxy-pencil %} Rename the AnnData output to `Multisample AnnData UMAP` +> 3. {% tool [SnapATAC2 Plotting](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for plotting"*: `Plot the UMAP embedding, using 'pl.umap'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData UMAP` (output of **tl.umap** {% icon tool %}) +> - *"Color"*: `batch` +> - *"Height of the plot"*: `500` +> +> 4. {% icon galaxy-pencil %} Rename the generated image to `spectral-UMAP-No Batch correction` +> +> 5. {% icon galaxy-eye %} Inspect the `.png` output +> +> ![UMAP plot before batch correction]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-no_harmony-spectral.png %}) +> +> > +> > +> > 1. How are batch effects visible in this plot? +> > 2. How would a plot without batch effects look like? +> > +> > > +> > > +> > > 1. Batch effects are visible as parts of the projection where the different samples are not distributed evenly. For example the large mass in the upper right corner of the plot consists of seperate groups for each batch. It is very likely, that the differences between these groups are technical artefacts, due to batch effects and do not represent "real" biological differences between the cells of different samples. +> > > 2. A plot without batch effects does not exhibit groups of cells from a single sample. Instead, most parts of the projection would consist of evenly distributed cells from all samples. +> > > +> > {: .solution} +> {: .question} +> +> +{: .hands_on} + +# Batch correction +After confirming that batch effects have affected our samples, we should remove them with correction algorithms. **SnapATAC2** offers three algorithms for batch correction: *Harmony*, *MNC-correct* and *Scanorama*. In order to determine the best-suited algorithm for your specific dataset, the batch correction outputs of different algorithms should be compared. + +> Batch correction algorithms +> - Batch correction algorithms aim to correct the cell-by-feature count matrix against batch-specific differences between samples. +> - Batch effects can arise from many different technical sources: variation in sequencing lanes, plates, protocol, handling. Additionally, biological factors can also be regarded as batch effects, for example tissue types, species and inter-individual variation {% cite Luecken2021 %}. +> - Many different correction algorithms have been developed, although most methods for scATAC-seq have been adapted from scRNA-seq batch removal algorithms. +> - *Harmony* {% cite Korsunsky2019 %} is a principle component analysis (PCA)-based method which utilizes the previously generated lower-dimensional data, to assign cells into new clusters. The grouping of cells into clusters favors multi-sample clusters, in order to integrate the datasets. Linear correction factors are calculated for each batch and cluster, and the cells are moved to the corrected positions. The preceding steps are iterated, until optimal batch correction is achieved. +> +> ![Graphical abstract of Harmony batch correction]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/harmony-graphical-abstract.png %}) +> +> - *Scanorama* {% cite Hie2019 %} performs panorama stiching, to find and merge overlapping cell types. +> +> ![Graphical abstract of Scanorama batch correction]({% link topics/single-cell/images/scanorama-graphical-abstract.png %}) +> +> - *MNC-correct* {% cite Zhang2024 %} is a modified version of a *mutual nearest neighbor* algorithm {% cite Haghverdi2018 %}. The algorithm calculates centroids for batch-specific clusters and identifies pairs of mutual nearest centroids (MNC) across batches. Correction vectors align the batches in the same plane. Additionally, *MNC-correct* can also be run iteratively, to find the most ideal corrections. +> +> ![Graphical abstract of MNC-correct batch correction]({% link topics/single-cell/images/mnn-graphical-abstract.png %}) +> +{: .details} + +We will use *Harmony* to correct for batch effects first. The other methods will be tested afterwards. +> Batch correction and visualization +> +> 1. {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for preprocessing"*: `Use harmonypy to integrate different experiments,using 'pp.harmony'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData UMAP` (output of **tl.umap** {% icon tool %}) +> +> 2. {% tool [SnapATAC2 Clustering](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1) %} with the following parameters: +> - *"Dimension reduction and Clustering"*: `Compute Umap, using 'tl.umap'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData harmony` (output of **pp.harmony** {% icon tool %}) +> - *"Use the indicated representation in `.obsm`"*: `X_spectral_harmony` +> - *"`adata.obs` key under which t add cluster labels"*: `umap_harmony` +> +> 2. {% icon galaxy-pencil %} Rename the AnnData output to `Multisample AnnData harmony UMAP` +> +> > Key for cluster labels +> > - If you add the new *UMAP-embeddings* under the key `umap_harmony`, the non-batch corrected embeddings are still stored in the AnnData object. +> > - Alternatively, by leaving this parameter empty, the old embeddings will be written over. +> {: .comment} +> +> 4. {% tool [SnapATAC2 Plotting](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for plotting"*: `Plot the UMAP embedding, using 'pl.umap'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData harmony UMAP` (output of **tl.umap** {% icon tool %}) +> - *"Color"*: `batch` +> - *"Use the indicated representation in .obsm"*: `X_umap_harmony` +> - *"Height of the plot"*: `500` +> +> 5. {% icon galaxy-pencil %} Rename the generated image to `spectral-UMAP-harmony` +> +> 6. {% icon galaxy-eye %} Inspect the `.png` output +> +> ![UMAP plot of Batch correction with Harmony]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-spectral.png %}) +> +> > +> > +> > 1. How has *Harmony* changed the appearance of this plot? +> > 2. Are there still parts of the plot where batch effects weren't removed? +> > +> > > +> > > +> > > 1. Most of the batch effects have been successfully removed by *Harmony*. Especially the center-left groups have now been merged into a single larger group, consisting of all batches. +> > > 2. Yes, for example in the upper-right corner batch-specific colors are not overlapping completely. However, it is possible that some samples simply contain fewer cells and are underrepresented in clusters of rare cell types. Therefore it can not be said for certain, if *Harmony* has removed all batch effects or not. +> > > +> > {: .solution} +> {: .question} +> +> +{: .hands_on} + +> Batch correction with Scanorama and MNC-correct +> - Other batch correction methods can also be selected with the tool {% tool [SnapATAC2 Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1) %}: *pp.mnc_correct* and *pp.scanorama_integrate* +> - For *MNC-correct* the *Number of iterations* can also be selected. +> - In order to identify the optimal algorithm, it is recommended to test the different algorithms and settings. +> - Examplary outputs of the methods *Scanorama* and *MNC-correct* are shown here: +> ![Batch correction UMAP plots of MNC-correct with different settings and Scanorama]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/mnc-correct-scanorama-umap-spectral.png %} "UMAP plots of batch correction with different methods. (a) Batch correction with MNC-correct and default settings. (b) Batch correction with Scanorama (c) Batch correction with MNC-correct and 30 iterations.") +> +> > +> > +> > - Compare these plots with the output of *Harmony*. Which algorithm is best-suited for the colon datasets? +> > +> > > +> > > +> > > - *Harmony* has performed the best batch integration. In that plot, the fewest single-batch groups are visible. *Scanorama* and *MNC-correct* (with the default settings) did not integrate the batches as well as *Harmony*. *MNC-correct* with 30 iterations, on the other hand, did remove a lot of batch effects and could also be used to continue the analysis. +> > > +> > {: .solution} +> {: .question} +> +{: .details} + + +# Clustering of the batch corrected samples +With the analysis can now continue with the same methods, shown in the [standard pathway]( {% link topics/single-cell/tutorials/scatac-standard-processing-snapatac2/tutorial.md %} ). The batch-corrected embeddings are now clustered and visualized with the *leiden* algorithm. + +> Leiden clustering and visualization +> +> 1. {% tool [SnapATAC2 Clustering](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1) %} with the following parameters: +> - *"Dimension reduction and Clustering"*: `Compute a neighborhood graph of observations, using 'pp.knn'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData harmony UMAP` (output of **tl.umap** {% icon tool %}) +> - *"The key for the matrix"*: `X_spectral_harmony` +> +> > Key for the matrix +> > +> > - The batch correction algorithms have individual keys for the matrix. +> > - *Harmony* has the key `X_spectral_harmony` +> > - *MNC-correct* has the key `X_spectral_mnn` +> > - *Scanorama* has the key `X_spectral_scanorama` +> > - The keys are stored in the AnnData object under 'obsm' +> > +> {: .tip} +> +> 2. {% tool [SnapATAC2 Clustering](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1) %} with the following parameters: +> - *"Dimension reduction and Clustering"*: `Cluster cells into subgroups, using 'tl.leiden'` +> - {% icon param-file %} *"Annotated data matrix"*: `Multisample AnnData harmony knn` (output of **pp.knn** {% icon tool %}) +> - *"Whether to use the Constant Potts Model (CPM) or modularity"*: `modularity` +> +> 3. {% icon galaxy-pencil %} Rename the AnnData output to `Multisample AnnData harmony leiden` +> +> 4. {% tool [SnapATAC2 Plotting](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1) %} with the following parameters: +> - *"Method used for plotting"*: `Plot the UMAP embedding, using 'pl.umap'` +> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **tl.leiden** {% icon tool %}) +> - *"Color"*: `leiden` +> - *"Use the indicated representation in .obsm"*: `X_umap_harmony` +> - *"Height of the plot"*: `500` +> +> 5. {% icon galaxy-pencil %} Rename the generated image to `spectral-UMAP-harmony-leiden` +> +> 6. {% icon galaxy-eye %} Inspect the `.png` output +> +> ![UMAP plot of Batch-corrected leiden clusters]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/batch-umap-harmony-leiden.png %}) +> +{: .hands_on} + +After integrating the different datasets and clustering the cells, the scATAC-seq analysis can now continue with downstream analysis. As part of the downstream analysis, the clusters can be [annotated]( {% link topics/single-cell/tutorials/scatac-standard-processing-snapatac2/tutorial.md %}#cell-cluster-annotation ) and differential peak analysis can be performed. +> +> +> The **SnapATAC2** tools for differential peak analysis are already accessible on Galaxy. However, there are no GTN trainings available yet. Until such a tutorial is uploaded, you can visit the **SnapATAC2** documentation for a [tutorial on differential peak analysis](https://kzhang.org/SnapATAC2/version/2.6/tutorials/diff.html). +> +> The tools are available in Galaxy under {% tool [SnapATAC2 Peaks and Motif](toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_peaks_and_motif/snapatac2_peaks_and_motif/2.6.4+galaxy1) %}. +> +{: .comment} + + +# Conclusion +{% icon congratulations %} Well done, you’ve made it to the end! You might want to consult your results with this [control history](https://usegalaxy.eu/u/timonschlegel/h/multisample-batch-correction-with-harmony-and-snapatac2), or check out the [full workflow](https://usegalaxy.eu/u/timonschlegel/w/multisample-batch-correction-with-snapatac2-and-harmony) for this tutorial. + +In this tutorial we have integrated five {scATAC-seq} colon samples. To achieve this, we have assembled a well-scalable Galaxy workflow and have compared different batch integration algorithms, to identify the best-suited method for our data. Finally, we have assigned the cells into clusters, to prepare the data for downstream analysis. + +![SnapATAC2 batch correction pipeline]({% link topics/single-cell/images/scatac-batch-correction-snapatac2/Batch-correction-pipeline-overview.png %}) \ No newline at end of file diff --git a/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/workflows/index.md b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/workflows/index.md new file mode 100644 index 00000000000000..e092e0ae66ddd4 --- /dev/null +++ b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/workflows/index.md @@ -0,0 +1,3 @@ +--- +layout: workflow-list +--- diff --git a/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/workflows/main_workflow.ga b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/workflows/main_workflow.ga new file mode 100644 index 00000000000000..0c6c3a4163a2cf --- /dev/null +++ b/topics/single-cell/tutorials/scatac-batch-correction-snapatac2/workflows/main_workflow.ga @@ -0,0 +1,1685 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This Workflow takes a dataset collection of single-cell ATAC-seq fragments and performs:\n- preprocessing\n- filtering\n- concatenation\n- dimension reduction\n- batch correction\n- leiden clustering", + "comments": [ + { + "child_steps": [ + 30, + 27, + 31 + ], + "color": "none", + "data": { + "title": "Clustering Analysis of batch-corrected Datasets" + }, + "id": 7, + "position": [ + 5166.299999999999, + 309.79999999999995 + ], + "size": [ + 892, + 559 + ], + "type": "frame" + }, + { + "child_steps": [ + 21, + 22, + 28, + 29, + 24, + 25 + ], + "color": "none", + "data": { + "title": "Batch correction with MNN-correct and Scanorama" + }, + "id": 6, + "position": [ + 4447.6, + 882.2 + ], + "size": [ + 644.7, + 870.2 + ], + "type": "frame" + }, + { + "child_steps": [ + 20, + 26, + 23 + ], + "color": "none", + "data": { + "title": "Batch correction with Harmony" + }, + "id": 5, + "position": [ + 4442.599999999999, + 306.9 + ], + "size": [ + 692.7, + 554.2 + ], + "type": "frame" + }, + { + "child_steps": [ + 18, + 19 + ], + "color": "none", + "data": { + "title": "No Batch Correction Control" + }, + "id": 4, + "position": [ + 4102.9, + 0 + ], + "size": [ + 569, + 249.1 + ], + "type": "frame" + }, + { + "child_steps": [ + 16, + 17 + ], + "color": "none", + "data": { + "title": "Dimension Reduction" + }, + "id": 3, + "position": [ + 3533.4, + 319.69999999999993 + ], + "size": [ + 691.1, + 572.3 + ], + "type": "frame" + }, + { + "child_steps": [ + 12, + 14, + 11, + 13, + 15 + ], + "color": "none", + "data": { + "title": "Concatenate Collection" + }, + "id": 2, + "position": [ + 2216.3, + 318.59999999999997 + ], + "size": [ + 1266.8, + 575.6 + ], + "type": "frame" + }, + { + "child_steps": [ + 5, + 4, + 3, + 6, + 7, + 8, + 9, + 10 + ], + "color": "none", + "data": { + "title": "Preprocessing and Filtering" + }, + "id": 1, + "position": [ + 406.5, + 309.9 + ], + "size": [ + 1738, + 604 + ], + "type": "frame" + }, + { + "child_steps": [ + 0, + 1, + 2 + ], + "color": "none", + "data": { + "title": "Inputs" + }, + "id": 0, + "position": [ + 0, + 380.19999999999993 + ], + "size": [ + 332, + 457 + ], + "type": "frame" + } + ], + "creator": [ + { + "class": "Person", + "identifier": "0009-0001-3228-105X", + "name": "Timon Schlegel" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "name": "Multisample Batch Correction with SnapATAC2 and Harmony", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Input MultiSample Collection" + } + ], + "label": "Input MultiSample Collection", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 53.13798070965498, + "top": 460.6479116979184 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null, \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "f9adf5a3-0e3b-4eda-98c8-18e45045b74f", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "chrom_sizes" + } + ], + "label": "chrom_sizes", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 53.50487956803778, + "top": 561.5661647555138 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "590b9d33-dab0-473c-973c-6583fc156e08", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "gencode.v46.annotation.gtf.gz" + } + ], + "label": "gencode.v46.annotation.gtf.gz", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 54.2630020709945, + "top": 677.5220223550216 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "efa670a3-9ac8-48b3-9a5c-c619465a048a", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 3, + "input_connections": { + "method|chrom_sizes": { + "id": 1, + "output_name": "output" + }, + "method|fragment_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + }, + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.import_data", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 436.11453455329143, + "top": 385.33835222289235 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.import_data\", \"__current_case__\": 1, \"fragment_file\": {\"__class__\": \"ConnectedValue\"}, \"chrom_sizes\": {\"__class__\": \"ConnectedValue\"}, \"min_num_fragments\": \"1000\", \"sorted_by_barcode\": false, \"whitelist\": null, \"chrM\": \"chrM, M\", \"shift_left\": \"0\", \"shift_right\": \"0\", \"chunk_size\": \"2000\"}, \"method|fragment_file|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "f9ca7736-2f00-4ae6-9de1-a6163423bd96", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 4, + "input_connections": { + "method|adata": { + "id": 3, + "output_name": "anndata_out" + }, + "method|gene_anno": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + }, + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "metrics.tsse", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 696.1660511549993, + "top": 604.0814144015509 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"metrics.tsse\", \"__current_case__\": 12, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"gene_anno\": {\"__class__\": \"ConnectedValue\"}}, \"method|adata|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "2719b1b0-9fb8-455d-948b-27539f6a8469", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "errors": null, + "id": 5, + "input_connections": { + "method|adata": { + "id": 4, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Plotting", + "name": "method" + } + ], + "label": "pl.tsse", + "name": "SnapATAC2 Plotting", + "outputs": [ + { + "name": "out_png", + "type": "png" + } + ], + "position": { + "left": 828.6103620550969, + "top": 371.10146465590987 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "370d45e0d1a8", + "name": "snapatac2_plotting", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pl.tsse\", \"__current_case__\": 1, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"min_fragment\": \"500\", \"width\": \"600\", \"height\": \"400\", \"out_file\": \"png\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "da811804-866d-4e7f-908a-d9b8d67f4228", + "when": null, + "workflow_outputs": [ + { + "label": "tsse-plots", + "output_name": "out_png", + "uuid": "a6e0e735-c13a-4560-9f99-d61f2d06a173" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 6, + "input_connections": { + "method|adata": { + "id": 4, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.filter_cells", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 917.4894272390953, + "top": 607.1630652872784 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.filter_cells\", \"__current_case__\": 4, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"min_counts\": \"1000\", \"min_tsse\": \"7.0\", \"max_counts\": null, \"max_tsse\": null}, \"method|adata|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "b89bb406-0957-474d-b00b-d0b9abfbf3df", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 7, + "input_connections": { + "method|adata": { + "id": 6, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.add_tile_matrix", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 1152.8205158897117, + "top": 612.4020835418164 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.add_tile_matrix\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"bin_size\": \"5000\", \"chunk_size\": \"500\", \"exclude_chroms\": \"chrM, chrY, M, Y\", \"min_frag_size\": null, \"max_frag_size\": null, \"counting_strategy\": \"insertion\"}, \"method|adata|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "a8c7be0d-9668-4d93-8a38-57ebc97165e5", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 8, + "input_connections": { + "method|adata": { + "id": 7, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.select_features", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 1400.4360347803472, + "top": 606.8857625864699 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.select_features\", \"__current_case__\": 5, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_features\": \"50000\", \"filter_lower_quantile\": \"0.005\", \"filter_upper_quantile\": \"0.005\", \"whitelist\": null, \"blacklist\": null, \"max_iter\": \"1\"}, \"method|adata|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "9fba171c-d341-4ec7-9a36-90dcef502db1", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 9, + "input_connections": { + "method|adata": { + "id": 8, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.scrublet", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 1650.6617105595355, + "top": 605.9834480076696 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.scrublet\", \"__current_case__\": 6, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"features\": \"\", \"n_comps\": \"15\", \"sim_doublet_ratio\": \"2.0\", \"expected_doublet_rate\": \"0.1\", \"n_neighbors\": null, \"use_approx_neighbors\": false, \"random_state\": \"0\"}, \"method|adata|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "1790492b-9897-43d7-b728-f1588891fcb6", + "when": null, + "workflow_outputs": [] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 10, + "input_connections": { + "method|adata": { + "id": 9, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.filter_doublets", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 1900.8976208641116, + "top": 604.2005231021845 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.filter_doublets\", \"__current_case__\": 7, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"probability_threshold\": \"0.5\", \"score_threshold\": null}, \"method|adata|__identifier__\": \"colon_transverse_SM-CSSDA_rep1_fragments.bed.gz\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "d6b45b80-6e5f-4156-b032-724fa4a623e1", + "when": null, + "workflow_outputs": [] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "errors": null, + "id": 11, + "input_connections": { + "input_collection": { + "id": 10, + "output_name": "anndata_out" + } + }, + "inputs": [], + "label": null, + "name": "Extract element identifiers", + "outputs": [ + { + "name": "output", + "type": "txt" + } + ], + "position": { + "left": 2303.319335568975, + "top": 696.0987087330783 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "tool_shed_repository": { + "changeset_revision": "d3c07d270a50", + "name": "collection_element_identifiers", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"input_collection\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.0.2", + "type": "tool", + "uuid": "20135638-f5a5-4ba7-bcbe-f6823aca6f22", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "", + "content_id": "__EXTRACT_DATASET__", + "errors": null, + "id": 12, + "input_connections": { + "input": { + "id": 10, + "output_name": "anndata_out" + } + }, + "inputs": [], + "label": "Extract first dataset", + "name": "Extract dataset", + "outputs": [ + { + "name": "output", + "type": "data" + } + ], + "position": { + "left": 2793.6873418961145, + "top": 378.35000496384527 + }, + "post_job_actions": { + "ChangeDatatypeActionoutput": { + "action_arguments": { + "newtype": "h5ad" + }, + "action_type": "ChangeDatatypeAction", + "output_name": "output" + } + }, + "tool_id": "__EXTRACT_DATASET__", + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"which\": {\"which_dataset\": \"first\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.1", + "type": "tool", + "uuid": "de5d5032-5ba3-430c-a4b6-cf182c49cf6b", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "", + "content_id": "Show beginning1", + "errors": null, + "id": 13, + "input_connections": { + "input": { + "id": 11, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Select first", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 2547.654216544315, + "top": 696.0859572999291 + }, + "post_job_actions": {}, + "tool_id": "Show beginning1", + "tool_state": "{\"header\": false, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineNum\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "973a6db9-5ea6-4135-861e-39aac0abcc1e", + "when": null, + "workflow_outputs": [] + }, + "14": { + "annotation": "", + "content_id": "__FILTER_FROM_FILE__", + "errors": null, + "id": 14, + "input_connections": { + "how|filter_source": { + "id": 13, + "output_name": "out_file1" + }, + "input": { + "id": 10, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Filter collection", + "name": "how" + } + ], + "label": null, + "name": "Filter collection", + "outputs": [ + { + "name": "output_filtered", + "type": "input" + }, + { + "name": "output_discarded", + "type": "input" + } + ], + "position": { + "left": 2788.2500213205853, + "top": 519.6562398530369 + }, + "post_job_actions": { + "HideDatasetActionoutput_discarded": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_discarded" + } + }, + "tool_id": "__FILTER_FROM_FILE__", + "tool_state": "{\"how\": {\"how_filter\": \"remove_if_present\", \"__current_case__\": 1, \"filter_source\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "138014fc-3f40-4418-a009-8c8421653d35", + "when": null, + "workflow_outputs": [ + { + "label": "collection_without_first_entry", + "output_name": "output_filtered", + "uuid": "dae74fe0-46fa-480a-832e-0d1c03e543c1" + } + ] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/anndata_manipulate/anndata_manipulate/0.10.3+galaxy0", + "errors": null, + "id": 15, + "input_connections": { + "input": { + "id": 12, + "output_name": "output" + }, + "manipulate|other_adatas": { + "id": 14, + "output_name": "output_filtered" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Manipulate AnnData", + "name": "manipulate" + } + ], + "label": "Concatenate AnnDatas", + "name": "Manipulate AnnData", + "outputs": [ + { + "name": "anndata", + "type": "h5ad" + } + ], + "position": { + "left": 3180.11251776899, + "top": 440.5810110565307 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/anndata_manipulate/anndata_manipulate/0.10.3+galaxy0", + "tool_shed_repository": { + "changeset_revision": "ed4996a16f7f", + "name": "anndata_manipulate", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"manipulate\": {\"function\": \"concatenate\", \"__current_case__\": 0, \"other_adatas\": {\"__class__\": \"ConnectedValue\"}, \"join\": \"inner\", \"batch_key\": \"batch\", \"index_unique\": \"-\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.10.3+galaxy0", + "type": "tool", + "uuid": "3cc04b3f-4640-470a-81de-cba3fdc0efe8", + "when": null, + "workflow_outputs": [] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 16, + "input_connections": { + "method|adata": { + "id": 15, + "output_name": "anndata" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + }, + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + }, + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.select_features on all", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 3598.1342933005426, + "top": 387.77659173995113 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pp.select_features\", \"__current_case__\": 5, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_features\": \"50000\", \"filter_lower_quantile\": \"0.005\", \"filter_upper_quantile\": \"0.005\", \"whitelist\": {\"__class__\": \"RuntimeValue\"}, \"blacklist\": {\"__class__\": \"RuntimeValue\"}, \"max_iter\": \"1\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "0a2510d5-ef7c-4fc7-99ce-82da9dc138e2", + "when": null, + "workflow_outputs": [] + }, + "17": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 17, + "input_connections": { + "method|adata": { + "id": 16, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + }, + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "tl.spectral", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 3920.9677429939366, + "top": 418.9262516496159 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"tl.spectral\", \"__current_case__\": 0, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_comps\": \"30\", \"features\": {\"__class__\": \"RuntimeValue\"}, \"random_state\": \"0\", \"sample_size\": null, \"chunk_size\": \"20000\", \"distance_metric\": \"cosine\", \"weighted_by_sd\": true}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "36a4d100-9fa5-42ec-bf38-66a92e946024", + "when": null, + "workflow_outputs": [] + }, + "18": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 18, + "input_connections": { + "method|adata": { + "id": 17, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "tl.umap_no-batch-correction", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4145.415189179224, + "top": 69.03750305188755 + }, + "post_job_actions": { + "TagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:no-batch-correction" + }, + "action_type": "TagDatasetAction", + "output_name": "anndata_out" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"tl.umap\", \"__current_case__\": 1, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_comps\": \"2\", \"use_dims\": null, \"use_rep\": \"X_spectral\", \"key_added\": \"umap\", \"random_state\": \"0\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "489a3462-6d93-4867-b6f7-939a877dc9c4", + "when": null, + "workflow_outputs": [] + }, + "19": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "errors": null, + "id": 19, + "input_connections": { + "method|adata": { + "id": 18, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Plotting", + "name": "method" + } + ], + "label": "pl.umap_no-batch-correction", + "name": "SnapATAC2 Plotting", + "outputs": [ + { + "name": "out_png", + "type": "png" + } + ], + "position": { + "left": 4399.117656747549, + "top": 70.45336867614213 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "370d45e0d1a8", + "name": "snapatac2_plotting", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pl.umap\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"color\": \"batch\", \"use_rep\": \"X_umap\", \"marker_size\": null, \"marker_opacity\": \"1.0\", \"sample_size\": null, \"width\": \"600\", \"height\": \"500\", \"out_file\": \"png\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "3b5506d2-8d1a-45bb-8ff6-098a8b783c06", + "when": null, + "workflow_outputs": [ + { + "label": "out_png", + "output_name": "out_png", + "uuid": "c66faaf2-6d6b-4410-9eb1-13a25c101c93" + } + ] + }, + "20": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 20, + "input_connections": { + "method|adata": { + "id": 18, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.harmony", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4516.035948467729, + "top": 429.0184899345538 + }, + "post_job_actions": { + "RemoveTagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:no-batch-correction" + }, + "action_type": "RemoveTagDatasetAction", + "output_name": "anndata_out" + }, + "TagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:harmony" + }, + "action_type": "TagDatasetAction", + "output_name": "anndata_out" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pp.harmony\", \"__current_case__\": 9, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"batch\": \"batch\", \"use_rep\": \"X_spectral\", \"use_dims\": \"\", \"groupby\": \"\", \"key_added\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "989ad3c3-6d77-4298-b718-01b1a8f2edc7", + "when": null, + "workflow_outputs": [] + }, + "21": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 21, + "input_connections": { + "method|adata": { + "id": 18, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.mnn_correct", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4506.593778021323, + "top": 1078.1490864698046 + }, + "post_job_actions": { + "RemoveTagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:no-batch-correction" + }, + "action_type": "RemoveTagDatasetAction", + "output_name": "anndata_out" + }, + "TagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:mnn-correct" + }, + "action_type": "TagDatasetAction", + "output_name": "anndata_out" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pp.mnc_correct\", \"__current_case__\": 8, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"batch\": \"batch\", \"n_neighbors\": \"5\", \"n_clusters\": \"40\", \"n_iter\": \"1\", \"use_rep\": \"X_spectral\", \"use_dims\": \"\", \"groupby\": \"\", \"key_added\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "db3331b4-9585-41e8-bdf4-0d26590a71eb", + "when": null, + "workflow_outputs": [] + }, + "22": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "errors": null, + "id": 22, + "input_connections": { + "method|adata": { + "id": 18, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Preprocessing", + "name": "method" + } + ], + "label": "pp.scanorama", + "name": "SnapATAC2 Preprocessing", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4459.69490054207, + "top": 1376.2766706368323 + }, + "post_job_actions": { + "RemoveTagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:no-batch-correction" + }, + "action_type": "RemoveTagDatasetAction", + "output_name": "anndata_out" + }, + "TagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:scanorama" + }, + "action_type": "TagDatasetAction", + "output_name": "anndata_out" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_preprocessing/snapatac2_preprocessing/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fef8f61cb34c", + "name": "snapatac2_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pp.scanorama_integrate\", \"__current_case__\": 10, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"batch\": \"batch\", \"n_neighbors\": \"20\", \"use_rep\": \"X_spectral\", \"use_dims\": \"\", \"groupby\": \"\", \"key_added\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "84d2703d-f036-47ec-852d-017c28213ea6", + "when": null, + "workflow_outputs": [] + }, + "23": { + "annotation": "Recalculates UMAP-embeddings and adds it under the key 'X_umap_harmony'", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 23, + "input_connections": { + "method|adata": { + "id": 20, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "tl.umap_batch-correction", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4820.5476172268345, + "top": 354.483884309917 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"tl.umap\", \"__current_case__\": 1, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_comps\": \"2\", \"use_dims\": null, \"use_rep\": \"X_spectral_harmony\", \"key_added\": \"umap_harmony\", \"random_state\": \"0\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "6b3fdb77-1c55-42c8-9a54-66555aeb68ad", + "when": null, + "workflow_outputs": [] + }, + "24": { + "annotation": "Recalculates UMAP-embeddings and adds it under the key 'X_umap_harmony'", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 24, + "input_connections": { + "method|adata": { + "id": 21, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "tl.umap_mnn-correct", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4789.584587750825, + "top": 958.5838086365831 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"tl.umap\", \"__current_case__\": 1, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_comps\": \"2\", \"use_dims\": null, \"use_rep\": \"X_spectral_mnn\", \"key_added\": \"umap_mnn\", \"random_state\": \"0\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "2b3cf578-196b-4c7f-94b0-e636ae32a579", + "when": null, + "workflow_outputs": [] + }, + "25": { + "annotation": "Recalculates UMAP-embeddings and adds it under the key 'X_umap_harmony'", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 25, + "input_connections": { + "method|adata": { + "id": 22, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "tl.umap_scanorama", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 4771.4381925037305, + "top": 1373.2418341851983 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"tl.umap\", \"__current_case__\": 1, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_comps\": \"2\", \"use_dims\": null, \"use_rep\": \"X_spectral_scanorama\", \"key_added\": \"umap_scanorama\", \"random_state\": \"0\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "ad8584af-f4ce-4296-b9c2-828efba2dea1", + "when": null, + "workflow_outputs": [] + }, + "26": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "errors": null, + "id": 26, + "input_connections": { + "method|adata": { + "id": 23, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Plotting", + "name": "method" + } + ], + "label": "UMAP-plot of X_spectral_harmony", + "name": "SnapATAC2 Plotting", + "outputs": [ + { + "name": "out_png", + "type": "png" + } + ], + "position": { + "left": 4844.917381714436, + "top": 601.5156231877603 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "370d45e0d1a8", + "name": "snapatac2_plotting", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pl.umap\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"color\": \"batch\", \"use_rep\": \"X_umap_harmony\", \"marker_size\": null, \"marker_opacity\": \"1.0\", \"sample_size\": null, \"width\": \"600\", \"height\": \"500\", \"out_file\": \"png\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "39b8cb8f-84b6-4130-bf6c-e524aa859b76", + "when": null, + "workflow_outputs": [ + { + "label": "x_spectral_harmony", + "output_name": "out_png", + "uuid": "71f8f33f-0017-467e-bf8c-ccd0b213b731" + } + ] + }, + "27": { + "annotation": "Recalculates UMAP-embeddings and adds it under the key 'X_umap_harmony'", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 27, + "input_connections": { + "method|adata": { + "id": 23, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "pp.knn", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 5212.859511125881, + "top": 388.8495120333433 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pp.knn\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"n_neighbors\": \"50\", \"use_dims\": \"\", \"use_rep\": \"X_spectral_harmony\", \"algorithm\": \"kdtree\", \"random_state\": \"0\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "e3151ab9-d897-4979-b57b-bca7c1fc8f10", + "when": null, + "workflow_outputs": [] + }, + "28": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "errors": null, + "id": 28, + "input_connections": { + "method|adata": { + "id": 24, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Plotting", + "name": "method" + } + ], + "label": "UMAP-plot of X_spectral_mnn-correct", + "name": "SnapATAC2 Plotting", + "outputs": [ + { + "name": "out_png", + "type": "png" + } + ], + "position": { + "left": 4801.687357021619, + "top": 1122.0156213521332 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "370d45e0d1a8", + "name": "snapatac2_plotting", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pl.umap\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"color\": \"batch\", \"use_rep\": \"X_umap_mnn\", \"marker_size\": null, \"marker_opacity\": \"1.0\", \"sample_size\": null, \"width\": \"600\", \"height\": \"500\", \"out_file\": \"png\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "5fd74ea5-2f06-419a-9bd2-5dc1a7deacf0", + "when": null, + "workflow_outputs": [ + { + "label": "x_spectral_mnn-correct", + "output_name": "out_png", + "uuid": "2fa7f3dc-fed8-4a34-b857-4cfd276e7625" + } + ] + }, + "29": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "errors": null, + "id": 29, + "input_connections": { + "method|adata": { + "id": 25, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Plotting", + "name": "method" + } + ], + "label": "UMAP plot of X_spectral_scanorama", + "name": "SnapATAC2 Plotting", + "outputs": [ + { + "name": "out_png", + "type": "png" + } + ], + "position": { + "left": 4783.526552836202, + "top": 1563.9154997337987 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "370d45e0d1a8", + "name": "snapatac2_plotting", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pl.umap\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"color\": \"batch\", \"use_rep\": \"X_umap_scanorama\", \"marker_size\": null, \"marker_opacity\": \"1.0\", \"sample_size\": null, \"width\": \"600\", \"height\": \"500\", \"out_file\": \"png\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "d7b7ac71-f429-414e-b629-b5d9eead0270", + "when": null, + "workflow_outputs": [ + { + "label": "x_spectral_scanorma", + "output_name": "out_png", + "uuid": "7a6c6165-42b6-4fff-b0d5-35f94541cd79" + } + ] + }, + "30": { + "annotation": "Recalculates UMAP-embeddings and adds it under the key 'X_umap_harmony'", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "errors": null, + "id": 30, + "input_connections": { + "method|adata": { + "id": 27, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Clustering", + "name": "method" + } + ], + "label": "tl.leiden", + "name": "SnapATAC2 Clustering", + "outputs": [ + { + "name": "anndata_out", + "type": "h5ad" + } + ], + "position": { + "left": 5500, + "top": 360 + }, + "post_job_actions": { + "TagDatasetActionanndata_out": { + "action_arguments": { + "tags": "name:leiden" + }, + "action_type": "TagDatasetAction", + "output_name": "anndata_out" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_clustering/snapatac2_clustering/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "afb90d3485df", + "name": "snapatac2_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"tl.leiden\", \"__current_case__\": 3, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"resolution\": \"1.0\", \"objective_function\": \"modularity\", \"min_cluster_size\": \"5\", \"n_iterations\": \"-1\", \"random_state\": \"0\", \"key_added\": \"leiden\", \"weighted\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "653eca3d-ed54-460c-927d-167c4aaabd2e", + "when": null, + "workflow_outputs": [ + { + "label": "anndata_harmony_leiden", + "output_name": "anndata_out", + "uuid": "532c487d-bfdb-4552-bed2-17ca6063aa01" + } + ] + }, + "31": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "errors": null, + "id": 31, + "input_connections": { + "method|adata": { + "id": 30, + "output_name": "anndata_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnapATAC2 Plotting", + "name": "method" + } + ], + "label": "UMAP-plot of Leiden Clusters", + "name": "SnapATAC2 Plotting", + "outputs": [ + { + "name": "out_png", + "type": "png" + } + ], + "position": { + "left": 5841.717777152833, + "top": 449.2168615373608 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snapatac2_plotting/snapatac2_plotting/2.6.4+galaxy1", + "tool_shed_repository": { + "changeset_revision": "370d45e0d1a8", + "name": "snapatac2_plotting", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced_common\": {\"show_log\": false}, \"method\": {\"method\": \"pl.umap\", \"__current_case__\": 2, \"adata\": {\"__class__\": \"ConnectedValue\"}, \"color\": \"leiden\", \"use_rep\": \"X_umap_harmony\", \"marker_size\": null, \"marker_opacity\": \"1.0\", \"sample_size\": null, \"width\": \"600\", \"height\": \"500\", \"out_file\": \"png\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.6.4+galaxy1", + "type": "tool", + "uuid": "34814927-feaf-4890-b6f5-e9c68f7ad9f2", + "when": null, + "workflow_outputs": [] + } + }, + "tags": [ + "epigenetics", + "scATAC-seq", + "name:single-cell" + ], + "uuid": "f7e4ef27-75ed-46b8-a3fe-3de91d2ff948", + "version": 30 +} \ No newline at end of file