Skip to content

Commit

Permalink
Merge pull request #199 from AnnaSyme/assembly-wfs
Browse files Browse the repository at this point in the history
Added flye assembly workflow
  • Loading branch information
mvdbeek authored Feb 14, 2024
2 parents f5a558f + b6cefa4 commit 5d9e8ee
Show file tree
Hide file tree
Showing 6 changed files with 366 additions and 0 deletions.
11 changes: 11 additions & 0 deletions workflows/genome-assembly/assembly-with-flye/.dockstore.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: 1.2
workflows:
- name: main
subclass: Galaxy
publish: true
primaryDescriptorPath: /Genome-assembly-with-Flye.ga
testParameterFiles:
- /Genome-assembly-with-Flye-tests.yml
authors:
- name: Anna Syme
orcid: 0000-0002-9906-0673
9 changes: 9 additions & 0 deletions workflows/genome-assembly/assembly-with-flye/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1] 2023-07-14
First release.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
- doc: Test outline for Genome-assembly-with-Flye
job:
Input sequence reads:
class: File
path: test-data/Input_reads.fastqsanger.gz
filetype: fastqsanger.gz
outputs:
Flye assembly (consensus):
asserts:
has_text:
text: '>contig_1'
has_size:
min: 80k
max: 86k
Flye assembly (assembly_graph):
asserts:
has_text:
text: 'digraph {'
has_n_lines:
min: 20
max: 24
Flye assembly (Graphical Fragment Assembly):
asserts:
has_text:
text: edge_1
has_n_lines:
min: 6
max: 8
Flye assembly (assembly_info):
asserts:
has_text:
text: seq_name
'Quast: HTML report':
asserts:
has_size:
min: 250k
max: 400k
Flye assembly statistics:
asserts:
has_text:
text: "Scaffold L50"
'Bandage Image: Assembly Graph Image':
asserts:
has_size:
min: 30k
max: 50k
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
{
"a_galaxy_workflow": "true",
"annotation": "Assemble long reads with Flye, then view assembly statistics and assembly graph",
"creator": [
{
"class": "Person",
"identifier": "https://orcid.org/0000-0002-9906-0673",
"name": "Anna Syme"
}
],
"format-version": "0.1",
"license": "MIT",
"release": "0.1",
"name": "Genome assembly with Flye",
"report": {
"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n\n"
},
"steps": {
"0": {
"annotation": "Sequence reads e.g. nanopore",
"content_id": null,
"errors": null,
"id": 0,
"input_connections": {},
"inputs": [
{
"description": "Sequence reads e.g. nanopore",
"name": "Input sequence reads"
}
],
"label": "Input sequence reads",
"name": "Input dataset",
"outputs": [],
"position": {
"left": 0,
"top": 263.5110244255367
},
"tool_id": null,
"tool_state": "{\"optional\": false, \"tag\": null}",
"tool_version": null,
"type": "data_input",
"uuid": "501d7387-3bed-4510-891f-a4571a48d9ab",
"when": null,
"workflow_outputs": []
},
"1": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/flye/flye/2.9.1+galaxy0",
"errors": null,
"id": 1,
"input_connections": {
"inputs": {
"id": 0,
"output_name": "output"
}
},
"inputs": [],
"label": "Flye: assembly",
"name": "Flye",
"outputs": [
{
"name": "consensus",
"type": "fasta"
},
{
"name": "assembly_graph",
"type": "graph_dot"
},
{
"name": "assembly_gfa",
"type": "txt"
},
{
"name": "assembly_info",
"type": "tabular"
}
],
"position": {
"left": 264.3488280182951,
"top": 139.5081025362239
},
"post_job_actions": {
"RenameDatasetActionassembly_gfa": {
"action_arguments": {
"newname": "Flye output as GFA for bandage"
},
"action_type": "RenameDatasetAction",
"output_name": "assembly_gfa"
}
},
"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/flye/flye/2.9.1+galaxy0",
"tool_shed_repository": {
"changeset_revision": "cb8dfd28c16f",
"name": "flye",
"owner": "bgruening",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"asm\": {\"asm_select\": \"false\", \"__current_case__\": 1}, \"generate_log\": false, \"i\": \"1\", \"inputs\": {\"__class__\": \"ConnectedValue\"}, \"iterations\": \"1\", \"keep_haplotypes\": false, \"m\": null, \"meta\": false, \"min_overlap\": null, \"mode\": \"--nano-raw\", \"mode_conditional\": {\"mode\": \"--nano-raw\", \"__current_case__\": 0}, \"no_alt_contigs\": false, \"no_trestle\": \"false\", \"plasmids\": \"false\", \"scaffold\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2.9.1+galaxy0",
"type": "tool",
"uuid": "8bed3591-266a-41f1-91d3-11b3456e8609",
"when": null,
"workflow_outputs": [
{
"label": "Flye assembly (consensus)",
"output_name": "consensus",
"uuid": "3960e31d-7a9e-400c-bb21-f6e47b75e649"
},
{
"label": "Flye assembly (assembly_graph)",
"output_name": "assembly_graph",
"uuid": "e524f295-a957-4c91-838c-f8e98e809b6c"
},
{
"label": "Flye assembly (Graphical Fragment Assembly)",
"output_name": "assembly_gfa",
"uuid": "48b854e2-dd6e-4345-8d05-09abca6659da"
},
{
"label": "Flye assembly (assembly_info)",
"output_name": "assembly_info",
"uuid": "8672c172-71a7-432c-9679-a8e37f36cf53"
}
]
},
"2": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.2.0+galaxy1",
"errors": null,
"id": 2,
"input_connections": {
"mode|in|inputs": {
"id": 1,
"output_name": "consensus"
}
},
"inputs": [],
"label": "Quast genome report",
"name": "Quast",
"outputs": [
{
"name": "report_html",
"type": "html"
}
],
"position": {
"left": 562.9918212890625,
"top": 0
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.2.0+galaxy1",
"tool_shed_repository": {
"changeset_revision": "72472698a2df",
"name": "quast",
"owner": "iuc",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"__job_resource\": {\"__job_resource__select\": \"no\", \"__current_case__\": 0}, \"advanced\": {\"contig_thresholds\": \"0,1000\", \"strict_NA\": false, \"extensive_mis_size\": \"1000\", \"scaffold_gap_max_size\": \"1000\", \"unaligned_part_size\": \"500\", \"skip_unaligned_mis_contigs\": true, \"fragmented_max_indent\": null, \"report_all_metrics\": false, \"x_for_Nx\": \"90\"}, \"al\": {\"ambiguity_score\": \"0.99\", \"ambiguity_usage\": \"one\", \"fragmented\": false, \"fragmented_max_indent\": \"50\", \"min_alignment\": \"65\", \"min_identity\": \"95.0\", \"upper_bound_assembly\": false, \"upper_bound_min_con\": \"2\", \"use_all_alignments\": false}, \"alignments\": {\"use_all_alignments\": false, \"min_alignment\": \"65\", \"ambiguity_usage\": \"one\", \"ambiguity_score\": \"0.99\", \"fragmented\": false, \"upper_bound_assembly\": false, \"upper_bound_min_con\": null, \"local_mis_size\": \"200\"}, \"assembly\": {\"type\": \"genome\", \"__current_case__\": 0, \"ref\": {\"use_ref\": \"false\", \"__current_case__\": 1, \"est_ref_size\": null}, \"orga_type\": \"\", \"min_identity\": \"95.0\"}, \"circos\": \"false\", \"contig_thresholds\": \"0,1000\", \"extensive_mis_size\": \"1000\", \"genes\": {\"gene_finding\": {\"tool\": \"none\", \"__current_case__\": 0}, \"rna_finding\": false, \"conserved_genes_finding\": false}, \"in\": {\"__current_case__\": 1, \"custom\": \"false\", \"inputs\": {\"__class__\": \"RuntimeValue\"}}, \"k_mer\": {\"__current_case__\": 1, \"k_mer_stats\": \"\"}, \"large\": true, \"min_contig\": \"500\", \"mode\": {\"mode\": \"co\", \"__current_case__\": 1, \"in\": {\"custom\": \"false\", \"__current_case__\": 1, \"inputs\": {\"__class__\": \"ConnectedValue\"}}, \"reads\": {\"reads_option\": \"disabled\", \"__current_case__\": 0}}, \"output_files\": \"html\", \"scaffold_gap_max_size\": \"1000\", \"skip_unaligned_mis_contigs\": \"false\", \"split_scaffolds\": false, \"strict_NA\": \"false\", \"unaligned_part_size\": \"500\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "5.2.0+galaxy1",
"type": "tool",
"uuid": "5bc29646-ec4c-4eb3-af01-73d82fc690a2",
"when": null,
"workflow_outputs": [
{
"label": "Quast: HTML report",
"output_name": "report_html",
"uuid": "17cdf8e0-8ad4-4570-afae-1861934fc678"
}
]
},
"3": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/iuc/fasta_stats/fasta-stats/2.0",
"errors": null,
"id": 3,
"input_connections": {
"fasta": {
"id": 1,
"output_name": "consensus"
}
},
"inputs": [],
"label": "Fasta statistics",
"name": "Fasta Statistics",
"outputs": [
{
"name": "stats_output",
"type": "tabular"
}
],
"position": {
"left": 569.7271118164062,
"top": 189.37097199902308
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/fasta_stats/fasta-stats/2.0",
"tool_shed_repository": {
"changeset_revision": "0dbb995c7d35",
"name": "fasta_stats",
"owner": "iuc",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"dataset\": {\"__class__\": \"ConnectedValue\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"gaps_option\": false, \"genome_size\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2.0",
"type": "tool",
"uuid": "811b5756-e24e-4de3-a3d6-2ee4bf347c1b",
"when": null,
"workflow_outputs": [
{
"label": "Flye assembly statistics",
"output_name": "stats_output",
"uuid": "49f24151-e76c-4d94-8045-4d52bcd9aae5"
}
]
},
"4": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_image/2022.09+galaxy4",
"errors": null,
"id": 4,
"input_connections": {
"input_file": {
"id": 1,
"output_name": "assembly_gfa"
}
},
"inputs": [],
"label": "Bandage image: Flye assembly",
"name": "Bandage Image",
"outputs": [
{
"name": "outfile",
"type": "jpg"
}
],
"position": {
"left": 571.0624389648438,
"top": 311.80291748046875
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_image/2022.09+galaxy4",
"tool_shed_repository": {
"changeset_revision": "ddddce450736",
"name": "bandage",
"owner": "iuc",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"fontsize\": null, \"height\": \"1000\", \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"lengths\": false, \"names\": false, \"nodewidth\": null, \"output_format\": \"jpg\", \"width\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2022.09+galaxy4",
"type": "tool",
"uuid": "395f2dfe-3fa5-4684-bf97-7dd12e970f78",
"when": null,
"workflow_outputs": [
{
"label": "Bandage Image: Assembly Graph Image",
"output_name": "outfile",
"uuid": "e66bd129-146f-48dc-95b8-39a2a1ffb68d"
}
]
}
},
"tags": [],
"uuid": "83ef6fdc-b8a9-41af-a824-0225f0199e63",
"version": 21
}
35 changes: 35 additions & 0 deletions workflows/genome-assembly/assembly-with-flye/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Genome assembly with Flye workflow


## Why use this workflow?

- This is a fairly simple workflow that assembles a genome from long sequencing reads.
- It takes in sequencing reads from PacBio (Hifi or non-Hifi), or Oxford Nanopore.
- If you have PacBio Hifi reads, you may prefer to use a workflow with the assembly tool Hifiasm, such as the those in the suite of VGP workflows.

## Inputs

Raw sequencing reads from PacBio or Oxford Nanopore in format:
fasta, fasta.gz, fastq, fastq.gz, fastqsanger.gz or fastqsanger

## What does the workflow do

- Assembles the reads with the tool Flye
- Summarizes the statistics with the tool Fasta statistics
- Report with the tool Quast
- Renders the assembly graph with the tool Bandage

## Settings

Run as-is or change parameters at runtime

For example:
- change the Flye option of "mode" to the correct sequencing type
- change the Quast option for "Type of organism" to correct taxon

## Outputs

- Flye assembly output - four files: fasta, gfa for bandage, graph_dot file, assembly info
- Fasta statistics
- Bandage image
- Quast report
Binary file not shown.

0 comments on commit 5d9e8ee

Please sign in to comment.