diff --git a/workflows/scRNAseq/velocyto/.dockstore.yml b/workflows/scRNAseq/velocyto/.dockstore.yml new file mode 100644 index 000000000..3f1eaeb9b --- /dev/null +++ b/workflows/scRNAseq/velocyto/.dockstore.yml @@ -0,0 +1,20 @@ +version: 1.2 +workflows: +- name: Velocyto-on10X-from-bundled + subclass: Galaxy + publish: true + primaryDescriptorPath: /Velocyto-on10X-from-bundled.ga + testParameterFiles: + - /Velocyto-on10X-from-bundled-tests.yml + authors: + - name: Lucille Delisle + orcid: 0000-0002-1964-4960 +- name: Velocyto-on10X-filtered-barcodes + subclass: Galaxy + publish: true + primaryDescriptorPath: /Velocyto-on10X-filtered-barcodes.ga + testParameterFiles: + - /Velocyto-on10X-filtered-barcodes-tests.yml + authors: + - name: Lucille Delisle + orcid: 0000-0002-1964-4960 diff --git a/workflows/scRNAseq/velocyto/CHANGELOG.md b/workflows/scRNAseq/velocyto/CHANGELOG.md new file mode 100644 index 000000000..86e3b2e3d --- /dev/null +++ b/workflows/scRNAseq/velocyto/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1] 2024-01-26 + +First release. diff --git a/workflows/scRNAseq/velocyto/README.md b/workflows/scRNAseq/velocyto/README.md new file mode 100644 index 000000000..8566b850f --- /dev/null +++ b/workflows/scRNAseq/velocyto/README.md @@ -0,0 +1,15 @@ +# Velocyto on 10X data + +These workflows simply run velocyto. There are 2 workflows because one can be easily run after the 'fastq-to-matrix-10x' workflows (Velocyto-on10X-from-bundled). The other can be easily run from uploaded datasets (Velocyto-on10X-filtered-barcodes). + +## Input datasets + +- BAM files with CB and UB: A collection of BAM. It accepts BAM from cellranger or STARsolo with the CB and UB tags (if you use the fastq-to-matrix-10x workflows these tags are automatically included). +- filtered barcodes (only for Velocyto_on10X_filtered_barcodes workflow): A collection of filtered barcodes (this is what will be used by velocyto). 'Filtered' means that these barcodes have been identified as potential cells. It should not be the whole list of 3 million possible barcodes from cellranger. +- filtered matrices in bundle (only for Velocyto_on10X_from_bundled workflow): A collection of filtered matrices as bundled (like the one which comes from the fastq-to-matrix-10x workflows): A collection with as many items as samples. For each sample, the item is a list with 3 datasets (barcodes, genes, matrix). The workflow will then extract the items which have the 'barcodes' identifier. +- gtf file: A file with annotations where exons are and how they are grouped into genes. + +## Processing + +- If you provided matrices, the first step is to extract barcodes. +- For both cases velocyto cli is run to get a loom file per sample with spliced and unspliced counts. diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes-tests.yml b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes-tests.yml new file mode 100644 index 000000000..54d9a2afa --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes-tests.yml @@ -0,0 +1,31 @@ +- doc: Test outline for Velocyto-on10X (barcodes) + job: + gtf file: + class: File + location: https://zenodo.org/record/6457007/files/Drosophila_melanogaster.BDGP6.32.109_UCSC.gtf.gz + decompress: true + filetype: gtf + BAM files with CB and UB: + class: Collection + collection_type: list + elements: + - class: File + identifier: subsample + location: https://zenodo.org/records/10572348/files/subsample.bam + filetype: bam + filtered barcodes: + class: Collection + collection_type: list + elements: + - class: File + identifier: subsample + location: https://zenodo.org/records/10572348/files/barcodes.tsv + filetype: tsv + outputs: + velocyto loom: + element_tests: + subsample: + asserts: + has_size: + value: 4639326 + delta: 400000 diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes.ga b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes.ga new file mode 100644 index 000000000..751f354f1 --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes.ga @@ -0,0 +1,178 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Run velocyto to get loom with counts of spliced and unspliced", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Velocyto-on10X-filtered-barcodes", + "steps": { + "0": { + "annotation": "This can be output of CellRanger or STARsolo", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "This can be output of CellRanger or STARsolo", + "name": "BAM files with CB and UB" + } + ], + "label": "BAM files with CB and UB", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"bam\"], \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "043d198b-3af0-477d-be11-5d1373280379", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "This can be output of STARsolo or DropletUtils (too many barcodes will make memory errors)", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "This can be output of STARsolo or DropletUtils (too many barcodes will make memory errors)", + "name": "filtered barcodes" + } + ], + "label": "filtered barcodes", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 55.999999999999986, + "top": 108.33333333333333 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"tsv\"], \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "03b0d22c-3b46-4460-8c18-b524bc09d655", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "gtf file", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "gtf file", + "name": "gtf file" + } + ], + "label": "gtf file", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 148, + "top": 204.66666666666666 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"gtf\"], \"tag\": \"\"}", + "tool_version": null, + "type": "data_input", + "uuid": "f1d04259-f449-44d7-8d36-49f3ad664361", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/velocyto_cli/velocyto_cli/0.17.17+galaxy1", + "errors": null, + "id": 3, + "input_connections": { + "main|BAM": { + "id": 0, + "output_name": "output" + }, + "main|barcodes": { + "id": 1, + "output_name": "output" + }, + "main|gtffile": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + } + ], + "label": "velocyto", + "name": "velocyto CLI", + "outputs": [ + { + "name": "samples", + "type": "loom" + } + ], + "position": { + "left": 470.66666666666663, + "top": 116.32222493489576 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/velocyto_cli/velocyto_cli/0.17.17+galaxy1", + "tool_shed_repository": { + "changeset_revision": "883c33ef3372", + "name": "velocyto_cli", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"main\": {\"do\": \"run10x\", \"__current_case__\": 0, \"sample_definition\": {\"sample_definition_select\": \"identifier\", \"__current_case__\": 1}, \"BAM\": {\"__class__\": \"ConnectedValue\"}, \"barcodes\": {\"__class__\": \"ConnectedValue\"}, \"gtffile\": {\"__class__\": \"ConnectedValue\"}, \"s\": {\"__class__\": \"RuntimeValue\"}, \"m\": {\"__class__\": \"RuntimeValue\"}, \"M\": false, \"t\": \"uint16\"}, \"verbosity\": \"-vv\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.17.17+galaxy1", + "type": "tool", + "uuid": "f9aef50f-b31d-44dd-aad6-fe4d520e3b1e", + "when": null, + "workflow_outputs": [ + { + "label": "velocyto loom", + "output_name": "samples", + "uuid": "2899a85c-c198-436f-a409-9bfbdf90c95f" + } + ] + } + }, + "tags": [ + "name:single-cell" + ], + "uuid": "33862923-af05-48ba-aec5-14393981cee2", + "version": 4 +} \ No newline at end of file diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled-tests.yml b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled-tests.yml new file mode 100644 index 000000000..261b132bd --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled-tests.yml @@ -0,0 +1,43 @@ +- doc: Test outline for Velocyto-on10X (bundled) + job: + gtf file: + class: File + location: https://zenodo.org/record/6457007/files/Drosophila_melanogaster.BDGP6.32.109_UCSC.gtf.gz + decompress: true + filetype: gtf + BAM files with CB and UB: + class: Collection + collection_type: list + elements: + - class: File + identifier: subsample + location: https://zenodo.org/records/10572348/files/subsample.bam + filetype: bam + filtered matrices in bundle: + class: Collection + collection_type: list:list + elements: + - class: Collection + type: list + identifier: subsample + elements: + - class: File + identifier: barcodes + location: https://zenodo.org/records/10572348/files/barcodes.tsv + filetype: tsv + - class: File + identifier: genes + location: https://zenodo.org/records/10572348/files/genes.tsv + filetype: tsv + - class: File + identifier: matrix + location: https://zenodo.org/records/10572348/files/matrix.mtx + filetype: mtx + outputs: + velocyto loom: + element_tests: + subsample: + asserts: + has_size: + value: 4639326 + delta: 400000 diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled.ga b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled.ga new file mode 100644 index 000000000..99d09c61c --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled.ga @@ -0,0 +1,362 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Run velocyto to get loom with counts of spliced and unspliced. It will extract the 'barcodes' from the bundled outputs.", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Velocyto-on10X-from-bundled", + "steps": { + "0": { + "annotation": "This can be output of CellRanger or STARsolo", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "This can be output of CellRanger or STARsolo", + "name": "BAM files with CB and UB" + } + ], + "label": "BAM files with CB and UB", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"bam\"], \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "043d198b-3af0-477d-be11-5d1373280379", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "This can be output of STARsolo or DropletUtils", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "This can be output of STARsolo or DropletUtils", + "name": "filtered matrices in bundle" + } + ], + "label": "filtered matrices in bundle", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 141, + "top": 139 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "03b0d22c-3b46-4460-8c18-b524bc09d655", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "gtf file", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "gtf file", + "name": "gtf file" + } + ], + "label": "gtf file", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 231, + "top": 273 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"gtf\"], \"tag\": \"\"}", + "tool_version": null, + "type": "data_input", + "uuid": "f1d04259-f449-44d7-8d36-49f3ad664361", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "__APPLY_RULES__", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Apply rules", + "name": "input" + } + ], + "label": "extract barcodes from bundle", + "name": "Apply rules", + "outputs": [ + { + "name": "output", + "type": "input" + } + ], + "position": { + "left": 421, + "top": 133 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "__APPLY_RULES__", + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"rules\": {\"mapping\": [{\"columns\": [0], \"editing\": false, \"type\": \"list_identifiers\"}], \"rules\": [{\"error\": null, \"type\": \"add_column_metadata\", \"value\": \"identifier0\", \"warn\": null}, {\"error\": null, \"type\": \"add_column_metadata\", \"value\": \"identifier1\", \"warn\": null}, {\"error\": null, \"invert\": false, \"target_column\": 1, \"type\": \"add_filter_matches\", \"value\": \"barcodes\", \"warn\": null}]}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.0", + "type": "tool", + "uuid": "8c2d032f-596c-4315-8361-ebf1ec2eb655", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "BAM files with CB and UB": { + "id": 0, + "input_subworkflow_step_id": 0, + "output_name": "output" + }, + "filtered barcodes": { + "id": 3, + "input_subworkflow_step_id": 1, + "output_name": "output" + }, + "gtf file": { + "id": 2, + "input_subworkflow_step_id": 2, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Velocyto_on10X_filtered_barcodes", + "outputs": [], + "position": { + "left": 710, + "top": 63.5 + }, + "subworkflow": { + "a_galaxy_workflow": "true", + "annotation": "", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "name": "Velocyto_on10X_filtered_barcodes", + "steps": { + "0": { + "annotation": "This can be output of CellRanger or STARsolo", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "This can be output of CellRanger or STARsolo", + "name": "BAM files with CB and UB" + } + ], + "label": "BAM files with CB and UB", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0.0, + "top": 0.0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"bam\"], \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "043d198b-3af0-477d-be11-5d1373280379", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "This can be output of STARsolo or DropletUtils", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "This can be output of STARsolo or DropletUtils", + "name": "filtered barcodes" + } + ], + "label": "filtered barcodes", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 55.999999999999986, + "top": 108.33333333333333 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"tsv\"], \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "03b0d22c-3b46-4460-8c18-b524bc09d655", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "gtf file", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "gtf file", + "name": "gtf file" + } + ], + "label": "gtf file", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 148.0, + "top": 204.66666666666666 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"gtf\"], \"tag\": \"\"}", + "tool_version": null, + "type": "data_input", + "uuid": "f1d04259-f449-44d7-8d36-49f3ad664361", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/velocyto_cli/velocyto_cli/0.17.17+galaxy1", + "errors": null, + "id": 3, + "input_connections": { + "main|BAM": { + "id": 0, + "output_name": "output" + }, + "main|barcodes": { + "id": 1, + "output_name": "output" + }, + "main|gtffile": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + }, + { + "description": "runtime parameter for tool velocyto CLI", + "name": "main" + } + ], + "label": "velocyto", + "name": "velocyto CLI", + "outputs": [ + { + "name": "samples", + "type": "loom" + } + ], + "position": { + "left": 470.66666666666663, + "top": 116.32222493489576 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/velocyto_cli/velocyto_cli/0.17.17+galaxy1", + "tool_shed_repository": { + "changeset_revision": "883c33ef3372", + "name": "velocyto_cli", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"main\": {\"do\": \"run10x\", \"__current_case__\": 0, \"sample_definition\": {\"sample_definition_select\": \"identifier\", \"__current_case__\": 1}, \"BAM\": {\"__class__\": \"ConnectedValue\"}, \"barcodes\": {\"__class__\": \"ConnectedValue\"}, \"gtffile\": {\"__class__\": \"ConnectedValue\"}, \"s\": {\"__class__\": \"RuntimeValue\"}, \"m\": {\"__class__\": \"RuntimeValue\"}, \"M\": false, \"t\": \"uint16\"}, \"verbosity\": \"-vv\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.17.17+galaxy1", + "type": "tool", + "uuid": "f9aef50f-b31d-44dd-aad6-fe4d520e3b1e", + "when": null, + "workflow_outputs": [ + { + "label": "velocyto loom", + "output_name": "samples", + "uuid": "2899a85c-c198-436f-a409-9bfbdf90c95f" + } + ] + } + }, + "tags": "", + "uuid": "f9b4ecd6-6d94-4a8c-b4ed-c43fd835f8b1" + }, + "tool_id": null, + "type": "subworkflow", + "uuid": "39ea418d-135d-4e7d-b2fe-fbde5c7393b7", + "when": null, + "workflow_outputs": [ + { + "label": "velocyto loom", + "output_name": "velocyto loom", + "uuid": "865e6dd6-8c01-4188-a1f6-4cd77e433eed" + } + ] + } + }, + "tags": [ + "name:single-cell" + ], + "uuid": "0608c7f4-767c-47b3-ae45-4e98c7c83bec", + "version": 8 +} \ No newline at end of file