From 7f3a8123f1e7f0f445bcf05b81cd51b1e86e1068 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Wed, 17 Jul 2024 09:02:15 -0400 Subject: [PATCH 01/60] feat: introduces WDL format --- .github/workflows/CI.yml | 43 +- Cargo.toml | 70 +- README.md | 4 +- backup/wdl-format-old-2/CHANGELOG.md | 12 + backup/wdl-format-old-2/Cargo.toml | 28 + backup/wdl-format-old-2/src/element.rs | 0 backup/wdl-format-old-2/src/formatter.rs | 131 + backup/wdl-format-old-2/src/import.rs | 174 + backup/wdl-format-old-2/src/lib.rs | 190 + backup/wdl-format-old-2/src/metadata.rs | 365 ++ backup/wdl-format-old-2/src/node.rs | 99 + backup/wdl-format-old-2/src/registry.rs | 373 ++ backup/wdl-format-old-2/src/task.rs | 455 +++ backup/wdl-format-old-2/src/token.rs | 82 + backup/wdl-format-old-2/src/tokens.rs | 2089 +++++++++++ backup/wdl-format-old-2/src/v1.rs | 711 ++++ backup/wdl-format-old-2/src/workflow.rs | 666 ++++ backup/wdl-format-old-2/tests/format.rs | 192 + .../ENCODE-DCC_chip-seq-pipeline/LICENSE.txt | 25 + .../source.formatted.wdl | 1 + .../ENCODE-DCC_chip-seq-pipeline/source.wdl | 3296 +++++++++++++++++ .../clays_complex_script/source.formatted.wdl | 7 + .../format/clays_complex_script/source.wdl | 165 + .../source.formatted.wdl | 1 + .../format/complex_meta_and_calls/source.wdl | 106 + .../source.formatted.wdl | 23 + .../imports_with_both_comments/source.wdl | 23 + .../source.formatted.wdl | 12 + .../imports_with_inline_comments/source.wdl | 12 + .../source.formatted.wdl | 5 + .../imports_with_no_comments/source.wdl | 7 + .../source.formatted.wdl | 23 + .../source.wdl | 23 + .../interrupt_example/source.formatted.wdl | 2 + .../tests/format/interrupt_example/source.wdl | 10 + .../tests/format/seaseq-case/LICENSE.txt | 205 + .../format/seaseq-case/source.formatted.wdl | 17 + .../tests/format/seaseq-case/source.wdl | 898 +++++ backup/wdl-format-old/CHANGELOG.md | 12 + backup/wdl-format-old/Cargo.toml | 28 + backup/wdl-format-old/src/formatter.rs | 131 + backup/wdl-format-old/src/import.rs | 174 + backup/wdl-format-old/src/lib.rs | 190 + backup/wdl-format-old/src/metadata.rs | 365 ++ backup/wdl-format-old/src/task.rs | 455 +++ backup/wdl-format-old/src/v1.rs | 711 ++++ backup/wdl-format-old/src/workflow.rs | 666 ++++ backup/wdl-format-old/tests/format.rs | 192 + .../ENCODE-DCC_chip-seq-pipeline/LICENSE.txt | 25 + .../source.formatted.wdl | 1 + .../ENCODE-DCC_chip-seq-pipeline/source.wdl | 3296 +++++++++++++++++ .../clays_complex_script/source.formatted.wdl | 7 + .../format/clays_complex_script/source.wdl | 165 + .../source.formatted.wdl | 1 + .../format/complex_meta_and_calls/source.wdl | 106 + .../source.formatted.wdl | 23 + .../imports_with_both_comments/source.wdl | 23 + .../source.formatted.wdl | 12 + .../imports_with_inline_comments/source.wdl | 12 + .../source.formatted.wdl | 5 + .../imports_with_no_comments/source.wdl | 7 + .../source.formatted.wdl | 23 + .../source.wdl | 23 + .../interrupt_example/source.formatted.wdl | 2 + .../tests/format/interrupt_example/source.wdl | 10 + .../tests/format/seaseq-case/LICENSE.txt | 205 + .../format/seaseq-case/source.formatted.wdl | 17 + .../tests/format/seaseq-case/source.wdl | 898 +++++ ci/Cargo.toml | 3 + ci/src/main.rs | 46 +- gauntlet/Cargo.toml | 3 + rustfmt.toml | 2 +- wdl-analysis/Cargo.toml | 3 + wdl-analysis/src/analyzer.rs | 1 - wdl-analysis/src/engine.rs | 792 ++++ wdl-analysis/src/eval/v1.rs | 2 +- wdl-analysis/src/graph.rs | 1 - wdl-analysis/src/stdlib.rs | 1710 ++++----- wdl-analysis/src/types/v1.rs | 22 +- wdl-analysis/tests/analysis.rs | 8 + wdl-ast/Cargo.toml | 5 + wdl-ast/src/element.rs | 755 ++++ wdl-ast/src/lib.rs | 85 +- wdl-ast/src/registry.rs | 381 ++ wdl-ast/src/v1.rs | 159 +- wdl-ast/src/v1/decls.rs | 384 +- wdl-ast/src/v1/expr.rs | 1199 ++++-- wdl-ast/src/v1/import.rs | 18 + wdl-ast/src/v1/struct.rs | 139 +- wdl-ast/src/v1/task.rs | 530 ++- wdl-ast/src/v1/task/common/container/value.rs | 2 - wdl-ast/src/v1/tokens.rs | 2089 +++++++++++ wdl-ast/src/v1/workflow.rs | 572 ++- wdl-ast/src/visitor.rs | 16 +- wdl-ast/tests/validation.rs | 5 + wdl-config/src/loader.rs | 127 + wdl-format/Cargo.toml | 15 + wdl-format/src/config.rs | 21 + wdl-format/src/config/builder.rs | 61 + wdl-format/src/config/indent.rs | 24 + wdl-format/src/element.rs | 508 +++ wdl-format/src/element/node.rs | 23 + wdl-format/src/lib.rs | 227 ++ wdl-format/src/token.rs | 69 + wdl-format/src/token/post.rs | 118 + wdl-format/src/token/pre.rs | 75 + wdl-format/src/v1.rs | 66 + wdl-format/src/v1/task.rs | 41 + wdl-format/src/v1/workflow.rs | 49 + wdl-format/src/v1/workflow/call.rs | 58 + wdl-grammar/Cargo.toml | 5 + wdl-grammar/src/tree.rs | 356 +- wdl-grammar/tests/parsing.rs | 5 + wdl-lint/Cargo.toml | 3 + .../rules/deprecated_placeholder_option.rs | 2 +- wdl-lint/src/rules/disallowed_input_name.rs | 1 - wdl-lint/src/rules/disallowed_output_name.rs | 1 - wdl-lint/src/rules/double_quotes.rs | 1 - wdl-lint/src/rules/ending_newline.rs | 1 - wdl-lint/src/rules/section_order.rs | 4 +- wdl-lint/tests/lints.rs | 6 + wdl-lsp/Cargo.toml | 3 + wdl/Cargo.toml | 7 +- wdl/examples/explore.rs | 5 + wdl/examples/parse.rs | 2 + wdl/src/bin/wdl.rs | 65 + wdl/src/lib.rs | 3 + 127 files changed, 27587 insertions(+), 1627 deletions(-) create mode 100644 backup/wdl-format-old-2/CHANGELOG.md create mode 100644 backup/wdl-format-old-2/Cargo.toml create mode 100644 backup/wdl-format-old-2/src/element.rs create mode 100644 backup/wdl-format-old-2/src/formatter.rs create mode 100644 backup/wdl-format-old-2/src/import.rs create mode 100644 backup/wdl-format-old-2/src/lib.rs create mode 100644 backup/wdl-format-old-2/src/metadata.rs create mode 100644 backup/wdl-format-old-2/src/node.rs create mode 100644 backup/wdl-format-old-2/src/registry.rs create mode 100644 backup/wdl-format-old-2/src/task.rs create mode 100644 backup/wdl-format-old-2/src/token.rs create mode 100644 backup/wdl-format-old-2/src/tokens.rs create mode 100644 backup/wdl-format-old-2/src/v1.rs create mode 100644 backup/wdl-format-old-2/src/workflow.rs create mode 100644 backup/wdl-format-old-2/tests/format.rs create mode 100644 backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt create mode 100644 backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl create mode 100644 backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt create mode 100644 backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl create mode 100644 backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl create mode 100644 backup/wdl-format-old/CHANGELOG.md create mode 100644 backup/wdl-format-old/Cargo.toml create mode 100644 backup/wdl-format-old/src/formatter.rs create mode 100644 backup/wdl-format-old/src/import.rs create mode 100644 backup/wdl-format-old/src/lib.rs create mode 100644 backup/wdl-format-old/src/metadata.rs create mode 100644 backup/wdl-format-old/src/task.rs create mode 100644 backup/wdl-format-old/src/v1.rs create mode 100644 backup/wdl-format-old/src/workflow.rs create mode 100644 backup/wdl-format-old/tests/format.rs create mode 100644 backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt create mode 100644 backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl create mode 100644 backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/clays_complex_script/source.wdl create mode 100644 backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl create mode 100644 backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/interrupt_example/source.wdl create mode 100644 backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt create mode 100644 backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl create mode 100644 backup/wdl-format-old/tests/format/seaseq-case/source.wdl create mode 100644 wdl-analysis/src/engine.rs create mode 100644 wdl-ast/src/element.rs create mode 100644 wdl-ast/src/registry.rs create mode 100644 wdl-ast/src/v1/tokens.rs create mode 100644 wdl-config/src/loader.rs create mode 100644 wdl-format/Cargo.toml create mode 100644 wdl-format/src/config.rs create mode 100644 wdl-format/src/config/builder.rs create mode 100644 wdl-format/src/config/indent.rs create mode 100644 wdl-format/src/element.rs create mode 100644 wdl-format/src/element/node.rs create mode 100644 wdl-format/src/lib.rs create mode 100644 wdl-format/src/token.rs create mode 100644 wdl-format/src/token/post.rs create mode 100644 wdl-format/src/token/pre.rs create mode 100644 wdl-format/src/v1.rs create mode 100644 wdl-format/src/v1/task.rs create mode 100644 wdl-format/src/v1/workflow.rs create mode 100644 wdl-format/src/v1/workflow/call.rs diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 91829fefc..ad0908303 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,7 +10,7 @@ jobs: format: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update nightly && rustup default nightly - name: Install rustfmt @@ -20,7 +20,7 @@ jobs: lint: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - name: Install clippy @@ -30,7 +30,7 @@ jobs: test: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo test --all-features @@ -38,7 +38,7 @@ jobs: test-examples: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo test --all-features --examples @@ -46,7 +46,7 @@ jobs: docs: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo doc @@ -54,7 +54,7 @@ jobs: gauntlet: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo run --release --bin gauntlet @@ -62,21 +62,30 @@ jobs: arena: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo run --release --bin gauntlet -- --arena + workspace-lints-enabled: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Update Rust + run: rustup update stable && rustup default stable + - run: cargo install cargo-workspace-lints --locked + - run: cargo workspace-lints + msrv: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Update Rust - run: rustup update stable && rustup default stable - - name: Install cargo-binstall - run: curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash - - name: Install cargo-msrv - run: cargo binstall -y --version 0.16.0-beta.23 cargo-msrv - - name: Verify the MSRV - working-directory: ./wdl - run: cargo msrv verify --output-format minimal --all-features + - uses: actions/checkout@v4 + - name: Update Rust + run: rustup update stable && rustup default stable + - name: Install cargo-binstall + run: curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash + - name: Install cargo-msrv + run: cargo binstall -y --version 0.16.0-beta.23 cargo-msrv + - name: Verify the MSRV + working-directory: ./wdl + run: cargo msrv verify --output-format minimal --all-features diff --git a/Cargo.toml b/Cargo.toml index c77a6f9d7..1b125c67e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,11 @@ [workspace] -members = [ "ci", +members = [ + "ci", + "gauntlet", "wdl", "wdl-analysis", "wdl-ast", - "gauntlet", + "wdl-format", "wdl-grammar", "wdl-lint", "wdl-lsp", @@ -19,40 +21,56 @@ repository = "https://github.com/stjude-rust-labs/wdl" rust-version = "1.80.0" [workspace.dependencies] +anyhow = "1.0.86" +approx = "0.5.1" clap = { version = "4.5.7", features = ["derive"] } +clap-verbosity-flag = "2.2.1" +codespan-reporting = "0.11.1" colored = "2.1.0" convert_case = "0.6.0" +dirs = "5.0.1" +faster-hex = "0.9.0" +futures = "0.3.30" +git2 = "0.18.3" +id-arena = "2.2.1" indexmap = { version = "2.2.6", features = ["serde"] } +indicatif = "0.17.8" +itertools = "0.13.0" +line-index = "0.1.1" +logos = "0.14.0" +nonempty = "0.10.0" +parking_lot = "0.12.3" +path-clean = "1.0.1" +petgraph = "0.6.5" +pretty_assertions = "1.4.0" +rayon = "1.10.0" +reqwest = "0.12.5" +rowan = "0.15.15" serde = { version = "1", features = ["derive"] } +serde_json = "1.0.120" serde_with = "3.8.1" +tempfile = "3.10.1" tokio = { version = "1.38.0", features = ["full"] } toml = "0.8.14" +tower-lsp = "0.20.0" tracing = "0.1.40" +tracing-log = "0.2.0" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } -logos = "0.14.0" -rowan = "0.15.15" -pretty_assertions = "1.4.0" -rayon = "1.10.0" -approx = "0.5.1" -codespan-reporting = "0.11.1" -anyhow = "1.0.86" -dirs = "5.0.1" -faster-hex = "0.9.0" -git2 = "0.18.3" -tempfile = "3.10.1" url = "2.5.2" urlencoding = "2.1.3" -parking_lot = "0.12.3" -reqwest = "0.12.5" -petgraph = "0.6.5" -futures = "0.3.30" -walkdir = "2.5.0" -path-clean = "1.0.1" -indicatif = "0.17.8" -tower-lsp = "0.20.0" -line-index = "0.1.1" -serde_json = "1.0.120" uuid = "1.10.0" -id-arena = "2.2.1" -clap-verbosity-flag = "2.2.1" -tracing-log = "0.2.0" +walkdir = "2.5.0" + +[workspace.lints.rust] +missing_docs = "warn" +nonstandard-style = "warn" +rust-2018-idioms = "warn" +rust-2021-compatibility = "warn" +rust-2024-compatibility = "warn" +edition_2024_expr_fragment_specifier = "allow" + +[workspace.lints.rustdoc] +broken_intra_doc_links = "warn" + +[workspace.lints.clippy] +missing_docs_in_private_items = "warn" diff --git a/README.md b/README.md index c7142c672..7c0269b30 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ cargo run --bin wdl --features cli -- $ARGS Where `$ARGS` are the command line arguments to the `wdl` CLI tool. -The `wdl` CLI tool currently supports three subcommands: +The `wdl` CLI tool currently supports the following subcommands: * `parse` - Parses a WDL document and prints both the parse diagnostics and the resulting Concrete Syntax Tree (CST). @@ -140,6 +140,8 @@ The `wdl` CLI tool currently supports three subcommands: document scopes and exits with a status code of `0` if the documents are valid; otherwise, prints the validation diagnostics and exits with a status code of `1`. +* `format` - Parses, validates, and then formats a single WDL document, printing + the result to STDOUT. Each of the subcommands supports passing `-` as the file path to denote reading from STDIN instead of a file on disk. diff --git a/backup/wdl-format-old-2/CHANGELOG.md b/backup/wdl-format-old-2/CHANGELOG.md new file mode 100644 index 000000000..3eeeadd81 --- /dev/null +++ b/backup/wdl-format-old-2/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added + +* Added the `wdl-format` crate for formatting WDL documents ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)). diff --git a/backup/wdl-format-old-2/Cargo.toml b/backup/wdl-format-old-2/Cargo.toml new file mode 100644 index 000000000..eec06496c --- /dev/null +++ b/backup/wdl-format-old-2/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "wdl-format" +version = "0.1.0" +license.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] +anyhow.workspace = true +wdl-ast = { path = "../wdl-ast", version = "0.5.0" } +wdl-grammar = { version = "0.6.0", path = "../wdl-grammar" } + +[dev-dependencies] +pretty_assertions = { workspace = true } +approx = { workspace = true } +rayon = { workspace = true } +colored = { workspace = true } +codespan-reporting = { workspace = true } + +[features] +codespan = ["wdl-ast/codespan"] + +[[test]] +name = "format" +required-features = ["codespan"] +harness = false diff --git a/backup/wdl-format-old-2/src/element.rs b/backup/wdl-format-old-2/src/element.rs new file mode 100644 index 000000000..e69de29bb diff --git a/backup/wdl-format-old-2/src/formatter.rs b/backup/wdl-format-old-2/src/formatter.rs new file mode 100644 index 000000000..6ef467b44 --- /dev/null +++ b/backup/wdl-format-old-2/src/formatter.rs @@ -0,0 +1,131 @@ +//! Contains the `Formatter` struct, which is used to keep track of the +//! current formatting state. This includes the current indentation level and +//! whether the current line has been interrupted by comments. +//! The state becomes "interrupted" by comments when a comment forces a newline +//! where it would otherwise not be expected. In this case, the next line(s) +//! will be indented by one level. + +use crate::Formattable; +use crate::NEWLINE; + +/// Space constant used for formatting. +pub const SPACE: &str = " "; +/// Indentation constant used for formatting. Indentation is four spaces +/// per-level. +pub const INDENT: &str = " "; +/// Inline comment space constant used for formatting. +/// +/// Inline comments should start two spaces after the end of the element they +/// are commenting on. +pub const INLINE_COMMENT_SPACE: &str = " "; + +/// The `Formatter` struct is used to keep track of the current formatting +/// state. This includes the current indentation level and whether the current +/// line has been interrupted by comments. +#[derive(Debug, Clone, Copy, Default)] +pub struct Formatter { + /// The current indentation level. + indent_level: usize, + /// Whether the current line has been interrupted by comments. + interrupted_by_comments: bool, +} + +impl Formatter { + /// Format an element. + pub fn format( + mut self, + element: &T, + writer: &mut F, + ) -> std::fmt::Result { + element.format(writer, &mut self) + } + + /// Add the current indentation to the writer. + /// The indentation level will be temporarily increased by one if the + /// current line has been interrupted by comments. + pub fn indent(&self, writer: &mut T) -> std::fmt::Result { + write!( + writer, + "{}", + INDENT.repeat(self.indent_level + (if self.interrupted_by_comments { 1 } else { 0 })) + ) + } + + /// Add a space or an indentation to the writer. If the current line has + /// been interrupted by comments, an indentation is added. Otherwise, a + /// space is added. + pub fn space_or_indent(&mut self, writer: &mut T) -> std::fmt::Result { + if !self.interrupted_by_comments { + write!(writer, "{}", SPACE)?; + } else { + self.indent(writer)?; + } + self.reset_interrupted(); + Ok(()) + } + + /// Add a level of indentation. + pub fn increment_indent(&mut self) { + self.indent_level += 1; + self.reset_interrupted(); + } + + /// Remove a level of indentation. + pub fn decrement_indent(&mut self) { + self.indent_level = self.indent_level.saturating_sub(1); + self.reset_interrupted(); + } + + /// Check if the current line has been interrupted by comments. + pub fn interrupted(&self) -> bool { + self.interrupted_by_comments + } + + /// Interrupt the current line with comments. + pub fn interrupt(&mut self) { + self.interrupted_by_comments = true; + } + + /// Reset the interrupted state. + pub fn reset_interrupted(&mut self) { + self.interrupted_by_comments = false; + } + + pub fn format_preceding_trivia( + &mut self, + writer: &mut F, + comments: Box<[String]>, + would_be_interrupting: bool, + respect_blank_lines: bool, + ) -> std::fmt::Result { + if would_be_interrupting && !comments.is_empty() && !self.interrupted_by_comments { + write!(writer, "{}", NEWLINE)?; + self.interrupt(); + } + for comment in comments { + if !respect_blank_lines && !comment.starts_with('#') { + continue; + } + self.indent(writer)?; + write!(writer, "{}{}", comment, NEWLINE)?; + } + Ok(()) + } + + pub fn format_inline_comment( + &mut self, + writer: &mut F, + comment: Option, + would_be_interrupting: bool, + ) -> std::fmt::Result { + if let Some(ref comment) = comment { + write!(writer, "{}{}{}", INLINE_COMMENT_SPACE, comment, NEWLINE)?; + } + if would_be_interrupting && comment.is_some() { + self.interrupt(); + } else if !would_be_interrupting && comment.is_none() { + write!(writer, "{}", NEWLINE)?; + } + Ok(()) + } +} diff --git a/backup/wdl-format-old-2/src/import.rs b/backup/wdl-format-old-2/src/import.rs new file mode 100644 index 000000000..5cc1cb96b --- /dev/null +++ b/backup/wdl-format-old-2/src/import.rs @@ -0,0 +1,174 @@ +//! Format import statements. + +use wdl_ast::token_child; +use wdl_ast::v1::AliasKeyword; +use wdl_ast::v1::AsKeyword; +use wdl_ast::v1::ImportAlias; +use wdl_ast::v1::ImportKeyword; +use wdl_ast::v1::ImportStatement; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Ident; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; +use wdl_grammar::SyntaxExt; + +use crate::Formattable; +use crate::Formatter; + +impl Formattable for ImportKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for AsKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for AliasKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for ImportAlias { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; + + let alias_keyword = self.alias_keyword(); + formatter.space_or_indent(writer)?; + alias_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, alias_keyword.syntax().inline_comment(), true)?; + + let (source, target) = self.names(); + + formatter.format_preceding_trivia( + writer, + source.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + source.format(writer, formatter)?; + formatter.format_inline_comment(writer, source.syntax().inline_comment(), true)?; + + let as_keyword = self.as_keyword(); + formatter.format_preceding_trivia( + writer, + as_keyword.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + as_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; + + formatter.format_preceding_trivia( + writer, + target.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + target.format(writer, formatter)?; + + formatter.format_inline_comment(writer, self.syntax().inline_comment(), true) + } +} + +impl Formattable for ImportStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + formatter.format_preceding_trivia( + writer, + self.syntax().preceding_trivia(), + false, + false, + )?; + + let import_keyword = self.keyword(); + import_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, import_keyword.syntax().inline_comment(), true)?; + + let uri = self.uri(); + formatter.format_preceding_trivia(writer, uri.syntax().preceding_trivia(), true, false)?; + formatter.space_or_indent(writer)?; + uri.format(writer, formatter)?; + formatter.format_inline_comment(writer, uri.syntax().inline_comment(), true)?; + + let as_keyword = token_child::(self.syntax()); + if let Some(as_keyword) = as_keyword { + formatter.format_preceding_trivia( + writer, + as_keyword.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + as_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; + + let ident = self + .explicit_namespace() + .expect("import with as clause should have an explicit namespace"); + formatter.format_preceding_trivia( + writer, + ident.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + ident.format(writer, formatter)?; + formatter.format_inline_comment(writer, ident.syntax().inline_comment(), true)?; + } + + for alias in self.aliases() { + alias.format(writer, formatter)?; + } + + formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) + } +} + +/// Sorts import statements by their core components. +/// +/// The core components of an import statement are the URI and the namespace. +/// These two elements guarantee a unique import statement. +pub fn sort_imports(a: &ImportStatement, b: &ImportStatement) -> std::cmp::Ordering { + ( + a.uri() + .text() + .expect("import URI cannot have placeholders") + .as_str(), + &a.namespace().expect("import namespace should exist").0, + ) + .cmp(&( + b.uri() + .text() + .expect("import URI cannot have placeholders") + .as_str(), + &b.namespace().expect("import namespace should exist").0, + )) +} diff --git a/backup/wdl-format-old-2/src/lib.rs b/backup/wdl-format-old-2/src/lib.rs new file mode 100644 index 000000000..283a2c77f --- /dev/null +++ b/backup/wdl-format-old-2/src/lib.rs @@ -0,0 +1,190 @@ +//! A library for auto-formatting WDL code. + +#![warn(missing_docs)] +#![warn(rust_2018_idioms)] +#![warn(rust_2021_compatibility)] +#![warn(missing_debug_implementations)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(rustdoc::broken_intra_doc_links)] + +use anyhow::Result; +use wdl_ast::token_child; +use wdl_ast::v1::VersionKeyword; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Diagnostic; +use wdl_ast::Document; +use wdl_ast::Ident; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; +use wdl_ast::SyntaxNode; +use wdl_ast::Validator; +use wdl_ast::Version; +use wdl_ast::VersionStatement; +use wdl_grammar::SyntaxExt; + +mod formatter; +mod import; +// mod metadata; +mod task; +mod v1; +mod workflow; + +use formatter::Formatter; + +/// Newline constant used for formatting on windows platforms. +#[cfg(windows)] +pub const NEWLINE: &str = "\r\n"; +/// Newline constant used for formatting on non-windows platforms. +#[cfg(not(windows))] +pub const NEWLINE: &str = "\n"; +/// String terminator constant used for formatting. +const STRING_TERMINATOR: char = '"'; +/// Lint directive prefix constant used for formatting. +const LINT_DIRECTIVE_PREFIX: &str = "#@"; + +/// A trait for elements that can be formatted. +pub trait Formattable { + /// Format the element and write it to the writer. + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result; +} + +impl Formattable for VersionKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for Version { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for VersionStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + let mut preamble_comments = Vec::new(); + let mut lint_directives = Vec::new(); + + for comment in self.syntax().preceding_trivia() { + if comment.starts_with(LINT_DIRECTIVE_PREFIX) { + lint_directives.push(comment); + } else if comment.starts_with('#') { + preamble_comments.push(comment); + } // else is just a newline + } + + for comment in preamble_comments.iter() { + write!(writer, "{}{}", comment, NEWLINE)?; + } + + // If there are preamble comments, ensure a blank line is inserted + if !preamble_comments.is_empty() { + write!(writer, "{}", NEWLINE)?; + } + + for comment in lint_directives.iter() { + write!(writer, "{}{}", comment, NEWLINE)?; + } + + let version_keyword = self.keyword(); + version_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, version_keyword.syntax().inline_comment(), true)?; + + let version = self.version(); + formatter.format_preceding_trivia( + writer, + version.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + version.format(writer, formatter)?; + formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) + } +} + +impl Formattable for Ident { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for Document { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + let ast = self.ast(); + let ast = ast.as_v1().expect("document should be a v1 document"); + let version_statement = self + .version_statement() + .expect("document should have a version statement"); + version_statement.format(writer, formatter)?; + let mut imports = ast.imports().collect::>(); + if !imports.is_empty() { + write!(writer, "{}", NEWLINE)?; + } + imports.sort_by(import::sort_imports); + for import in imports { + import.format(writer, formatter)?; + } + for item in ast.items() { + if item.syntax().kind() == SyntaxKind::ImportStatementNode { + continue; + } + // write!(writer, "{}", NEWLINE)?; + // item.format(writer, formatter)?; + } + Ok(()) + } +} + +/// Format a WDL document. +pub fn format_document(code: &str) -> Result> { + let (document, diagnostics) = Document::parse(code); + if !diagnostics.is_empty() { + return Err(diagnostics); + } + let mut validator = Validator::default(); + match validator.validate(&document) { + std::result::Result::Ok(_) => { + // The document is valid, so we can format it. + } + Err(diagnostics) => return Err(diagnostics), + } + + let mut result = String::new(); + let formatter = &mut Formatter::default(); + + match formatter.format(&document, &mut result) { + Ok(_) => {} + Err(error) => { + let msg = format!("Failed to format document: {}", error); + return Err(vec![Diagnostic::error(msg)]); + } + } + + Ok(result) +} diff --git a/backup/wdl-format-old-2/src/metadata.rs b/backup/wdl-format-old-2/src/metadata.rs new file mode 100644 index 000000000..9bb557f29 --- /dev/null +++ b/backup/wdl-format-old-2/src/metadata.rs @@ -0,0 +1,365 @@ +//! A module for formatting metadata sections (meta and parameter_meta). + +use wdl_ast::v1::LiteralNull; +use wdl_ast::v1::MetadataArray; +use wdl_ast::v1::MetadataObject; +use wdl_ast::v1::MetadataObjectItem; +use wdl_ast::v1::MetadataSection; +use wdl_ast::v1::MetadataValue; +use wdl_ast::v1::ParameterMetadataSection; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; + +use super::comments::format_inline_comment; +use super::comments::format_preceding_comments; +use super::first_child_of_kind; +use super::format_element_with_comments; +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::LinePosition; +use super::NEWLINE; + +impl Formattable for LiteralNull { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) + } +} + +impl Formattable for MetadataObject { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + format_element_with_comments( + &open_brace, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + let mut commas = self + .syntax() + .children_with_tokens() + .filter(|c| c.kind() == SyntaxKind::Comma); + + for item in self.items() { + item.format(writer, formatter)?; + if let Some(cur_comma) = commas.next() { + format_element_with_comments( + &cur_comma, + writer, + formatter, + LinePosition::End, + |_, _| Ok(()), + )?; + } else { + // No trailing comma was in the input + write!(writer, ",")?; + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + format_preceding_comments(&close_brace, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_brace)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + true, + ) + } +} + +impl Formattable for MetadataArray { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let open_bracket = first_child_of_kind(self.syntax(), SyntaxKind::OpenBracket); + format_element_with_comments( + &open_bracket, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + let mut commas = self + .syntax() + .children_with_tokens() + .filter(|c| c.kind() == SyntaxKind::Comma); + + for item in self.elements() { + formatter.indent(writer)?; + item.format(writer, formatter)?; + if let Some(cur_comma) = commas.next() { + format_element_with_comments( + &cur_comma, + writer, + formatter, + LinePosition::End, + |_, _| Ok(()), + )?; + } else { + // No trailing comma was in the input + write!(writer, ",")?; + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_bracket = first_child_of_kind(self.syntax(), SyntaxKind::CloseBracket); + format_preceding_comments(&close_bracket, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_bracket)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + true, + ) + } +} + +impl Formattable for MetadataValue { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + MetadataValue::String(s) => s.format(writer, formatter), + MetadataValue::Boolean(b) => b.format(writer, formatter), + MetadataValue::Float(f) => f.format(writer, formatter), + MetadataValue::Integer(i) => i.format(writer, formatter), + MetadataValue::Null(n) => n.format(writer, formatter), + MetadataValue::Object(o) => o.format(writer, formatter), + MetadataValue::Array(a) => a.format(writer, formatter), + } + } +} + +impl Formattable for MetadataObjectItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let name = self.name(); + formatter.indent(writer)?; + name.format(writer, formatter)?; + format_inline_comment( + &SyntaxElement::from(name.syntax().clone()), + writer, + formatter, + true, + )?; + + let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + format_element_with_comments( + &colon, + writer, + formatter, + LinePosition::Middle, + |writer, formatter| { + if formatter.interrupted() { + formatter.indent(writer)?; + formatter.reset_interrupted(); + } + Ok(()) + }, + )?; + + let value = self.value(); + format_preceding_comments( + &SyntaxElement::from(value.syntax().clone()), + writer, + formatter, + true, + )?; + formatter.space_or_indent(writer)?; + value.format(writer, formatter)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + true, + ) + } +} + +impl Formattable for MetadataSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let meta_keyword = first_child_of_kind(self.syntax(), SyntaxKind::MetaKeyword); + formatter.indent(writer)?; + write!(writer, "{}", meta_keyword)?; + format_inline_comment(&meta_keyword, writer, formatter, true)?; + + let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + format_element_with_comments( + &open_brace, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } else { + write!(writer, "{}", SPACE)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + for item in self.items() { + item.format(writer, formatter)?; + if formatter.interrupted() { + formatter.reset_interrupted(); + } else { + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + format_preceding_comments(&close_brace, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_brace)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + ) + } +} + +impl Formattable for ParameterMetadataSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let parameter_meta_keyword = + first_child_of_kind(self.syntax(), SyntaxKind::ParameterMetaKeyword); + formatter.indent(writer)?; + write!(writer, "{}", parameter_meta_keyword)?; + format_inline_comment(¶meter_meta_keyword, writer, formatter, true)?; + + let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + format_element_with_comments( + &open_brace, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } else { + write!(writer, "{}", SPACE)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + for item in self.items() { + item.format(writer, formatter)?; + if formatter.interrupted() { + formatter.reset_interrupted(); + } else { + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + format_preceding_comments(&close_brace, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_brace)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + ) + } +} diff --git a/backup/wdl-format-old-2/src/node.rs b/backup/wdl-format-old-2/src/node.rs new file mode 100644 index 000000000..e44d720c2 --- /dev/null +++ b/backup/wdl-format-old-2/src/node.rs @@ -0,0 +1,99 @@ +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::Token; +use wdl_ast::WorkflowDescriptionLanguage; + +use crate::TokenStream; +use crate::Writable; + +type DynAstNode<'a> = &'a dyn AstNode; +type DynAstToken<'a> = &'a dyn AstToken; + +pub struct FormatNode<'a>(&'a dyn DynAstNode); + +impl<'a> FormatNode<'a> { + pub fn new + 'a>(value: &'a T) -> Self { + Self(value as DynAstNode) + } + + pub fn collate(&self) -> FormatElement<'_> {} +} + +pub trait AstNodeFormatExt: AstNode { + fn as_format_node(&self) -> FormatNode<'_> + where + Self: Sized, + { + FormatNode::new(self) + } +} + +impl> AstNodeFormatExt for T {} + +pub struct FormatToken<'a>(DynAstToken<'a>); + +impl<'a> FormatToken<'a> { + pub fn new(value: &'a T) -> Self { + Self(value as DynAstToken) + } +} + +pub trait AstTokenFormatExt: AstToken { + fn as_format_token(&self) -> FormatToken<'_> + where + Self: Sized, + { + FormatToken::new(self) + } +} + +impl AstTokenFormatExt for T {} + +impl<'a> Writable<'a> for FormatToken<'a> { + fn write(&self, stream: &mut TokenStream<'a>) { + stream.write(self.0.as_str()); + } +} + +pub enum FormatElement<'a> { + Node(FormatNode<'a>), + Token(FormatToken<'a>), +} + +impl From for FormatElement<'_> { + fn from(value: Token) -> Self {} +} + +#[cfg(test)] +mod tests { + use wdl_ast::Document; + + use crate::node::AstNodeFormatExt as _; + + #[test] + fn smoke() { + let (document, diagnostics) = Document::parse( + "version 1.2 + +# This is a comment attached to the task. +task foo # This is an inline comment. +{ + +} + +# This is a comment attached to the workflow. +workflow bar # This is inline with the workflow +{ + # This is attached to the call. + call foo {} +}", + ); + + assert!(diagnostics.is_empty()); + + let ast = document.ast(); + let ast = ast.as_v1().unwrap(); + let node = ast.as_format_node(); + } +} diff --git a/backup/wdl-format-old-2/src/registry.rs b/backup/wdl-format-old-2/src/registry.rs new file mode 100644 index 000000000..6a6b1cec8 --- /dev/null +++ b/backup/wdl-format-old-2/src/registry.rs @@ -0,0 +1,373 @@ +//! The format node registry. +//! +//! The format entity registry was introduced only to ensure that all nodes and +//! tokens in the concrete syntax tree have one and _only_ one analogous format +//! entity. +//! +//! The reason this is important to ensure statically is because this assumption +//! of one-to-one mapping between elements within the two types of trees is used +//! within formatting. For example, formatting works by traversing the CST of a +//! WDL document and attempting to cast a node to any format element. +//! +//! Furthermore, this is just a good invariant to uphold to ensure in general in +//! that the code remains straightforward to reason about (a CST element that +//! can map to multiple different formattable elements in different contexts is +//! inherently confusing). + +use std::any::type_name; +use std::collections::HashMap; +use std::sync::LazyLock; + +use wdl_ast::v1; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Comment; +use wdl_ast::Ident; +use wdl_ast::SyntaxKind; +use wdl_ast::Version; +use wdl_ast::VersionStatement; +use wdl_ast::Whitespace; +use wdl_grammar::WorkflowDescriptionLanguage; +use wdl_grammar::ALL_SYNTAX_KIND; + +/// A private module for sealed traits. +/// +/// The traits are sealed because we want to ensure that we reserve the right to +/// implement them in the future unhindered without introducing breaking changes. +mod private { + /// The sealed trait for [`AstNodeRegistrant`](super::AstNodeRegistrant). + pub trait SealedNode {} + + /// The sealed trait for [`AstTokenRegistrant`](super::AstTokenRegistrant). + pub trait SealedToken {} +} + +/// A registry of all known mappings between format elements (individual Rust types +/// that implement the [`AstNode`] trait or [`AstToken`] trait) and the CST +/// elements they can be cast from (via [`SyntaxKind`]\(s)). +/// +/// This is useful for ensuring that AST elements have a one-to-one mapping with +/// CST element kinds. +static REGISTRY: LazyLock>> = LazyLock::new(|| { + let types = vec![ + Comment::register(), + Ident::register(), + v1::AccessExpr::register(), + v1::AdditionExpr::register(), + v1::AfterKeyword::register(), + v1::AliasKeyword::register(), + v1::ArrayType::register(), + v1::ArrayTypeKeyword::register(), + v1::AsKeyword::register(), + v1::Assignment::register(), + v1::Ast::register(), + v1::Asterisk::register(), + v1::BooleanTypeKeyword::register(), + v1::BoundDecl::register(), + v1::CallAfter::register(), + v1::CallAlias::register(), + v1::CallExpr::register(), + v1::CallInputItem::register(), + v1::CallKeyword::register(), + v1::CallStatement::register(), + v1::CallTarget::register(), + v1::CloseBrace::register(), + v1::CloseBracket::register(), + v1::CloseHeredoc::register(), + v1::CloseParen::register(), + v1::Colon::register(), + v1::Comma::register(), + v1::CommandKeyword::register(), + v1::CommandSection::register(), + v1::CommandText::register(), + v1::ConditionalStatement::register(), + v1::DefaultOption::register(), + v1::DirectoryTypeKeyword::register(), + v1::DivisionExpr::register(), + v1::Dot::register(), + v1::DoubleQuote::register(), + v1::ElseKeyword::register(), + v1::Equal::register(), + v1::EqualityExpr::register(), + v1::Exclamation::register(), + v1::Exponentiation::register(), + v1::ExponentiationExpr::register(), + v1::FalseKeyword::register(), + v1::FileTypeKeyword::register(), + v1::Float::register(), + v1::FloatTypeKeyword::register(), + v1::Greater::register(), + v1::GreaterEqual::register(), + v1::GreaterEqualExpr::register(), + v1::GreaterExpr::register(), + v1::HintsItem::register(), + v1::HintsKeyword::register(), + v1::HintsSection::register(), + v1::IfExpr::register(), + v1::IfKeyword::register(), + v1::ImportAlias::register(), + v1::ImportKeyword::register(), + v1::ImportStatement::register(), + v1::IndexExpr::register(), + v1::InequalityExpr::register(), + v1::InKeyword::register(), + v1::InputKeyword::register(), + v1::InputSection::register(), + v1::Integer::register(), + v1::IntTypeKeyword::register(), + v1::Less::register(), + v1::LessEqual::register(), + v1::LessEqualExpr::register(), + v1::LessExpr::register(), + v1::LiteralArray::register(), + v1::LiteralBoolean::register(), + v1::LiteralFloat::register(), + v1::LiteralHints::register(), + v1::LiteralHintsItem::register(), + v1::LiteralInput::register(), + v1::LiteralInputItem::register(), + v1::LiteralInteger::register(), + v1::LiteralMap::register(), + v1::LiteralMapItem::register(), + v1::LiteralNone::register(), + v1::LiteralNull::register(), + v1::LiteralObject::register(), + v1::LiteralObjectItem::register(), + v1::LiteralOutput::register(), + v1::LiteralOutputItem::register(), + v1::LiteralPair::register(), + v1::LiteralString::register(), + v1::LiteralStruct::register(), + v1::LiteralStructItem::register(), + v1::LogicalAnd::register(), + v1::LogicalAndExpr::register(), + v1::LogicalNotExpr::register(), + v1::LogicalOr::register(), + v1::LogicalOrExpr::register(), + v1::MapType::register(), + v1::MapTypeKeyword::register(), + v1::MetadataArray::register(), + v1::MetadataObject::register(), + v1::MetadataObjectItem::register(), + v1::MetadataSection::register(), + v1::MetaKeyword::register(), + v1::Minus::register(), + v1::ModuloExpr::register(), + v1::MultiplicationExpr::register(), + v1::NameRef::register(), + v1::NegationExpr::register(), + v1::NoneKeyword::register(), + v1::NotEqual::register(), + v1::NullKeyword::register(), + v1::ObjectKeyword::register(), + v1::ObjectType::register(), + v1::ObjectTypeKeyword::register(), + v1::OpenBrace::register(), + v1::OpenBracket::register(), + v1::OpenHeredoc::register(), + v1::OpenParen::register(), + v1::OutputKeyword::register(), + v1::OutputSection::register(), + v1::PairType::register(), + v1::PairTypeKeyword::register(), + v1::ParameterMetadataSection::register(), + v1::ParameterMetaKeyword::register(), + v1::ParenthesizedExpr::register(), + v1::Percent::register(), + v1::Placeholder::register(), + v1::PlaceholderOpen::register(), + v1::Plus::register(), + v1::PrimitiveType::register(), + v1::QuestionMark::register(), + v1::RequirementsItem::register(), + v1::RequirementsKeyword::register(), + v1::RequirementsSection::register(), + v1::RuntimeItem::register(), + v1::RuntimeKeyword::register(), + v1::RuntimeSection::register(), + v1::ScatterKeyword::register(), + v1::ScatterStatement::register(), + v1::SepOption::register(), + v1::SingleQuote::register(), + v1::Slash::register(), + v1::StringText::register(), + v1::StringTypeKeyword::register(), + v1::StructDefinition::register(), + v1::StructKeyword::register(), + v1::SubtractionExpr::register(), + v1::TaskDefinition::register(), + v1::TaskKeyword::register(), + v1::ThenKeyword::register(), + v1::TrueFalseOption::register(), + v1::TrueKeyword::register(), + v1::TypeRef::register(), + v1::UnboundDecl::register(), + v1::Unknown::register(), + v1::VersionKeyword::register(), + v1::WorkflowDefinition::register(), + v1::WorkflowKeyword::register(), + Version::register(), + VersionStatement::register(), + Whitespace::register(), + ]; + + let mut result = HashMap::new(); + + // NOTE: this is done this way instead of simply collecting into a + // [`HashMap`] to ensure on the fly that no keys are duplicated. + for (r#type, kinds) in types { + if result.contains_key(&r#type) { + panic!("the `{:?}` key is duplicated", r#type); + } + + result.insert(r#type, kinds); + } + + result +}); + +/// Computes the inverse of the registry. +/// +/// In other words, maps CST elements—dynamically typed as [`SyntaxKind`]s—to +/// the corresponding AST element(s) that can cast from them. +/// +/// This is useful for ensuring that AST elements have a one-to-one mapping with +/// CST element kinds. +fn inverse() -> HashMap> { + let mut result = HashMap::>::new(); + + for (key, values) in REGISTRY.iter() { + for value in values.into_iter() { + result.entry(value.to_owned()).or_default().push(*key); + } + } + + result + .into_iter() + .map(|(key, values)| (key, values.into_boxed_slice())) + .collect() +} + +trait AstNodeRegistrant: private::SealedNode { + /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST node type. + fn register() -> (&'static str, Box<[SyntaxKind]>); +} + +impl + 'static> private::SealedNode for T {} + +impl + 'static> AstNodeRegistrant for T { + fn register() -> (&'static str, Box<[SyntaxKind]>) { + ( + type_name::(), + ALL_SYNTAX_KIND + .iter() + .filter(|kind| T::can_cast(**kind)) + .cloned() + .collect::>() + .into_boxed_slice(), + ) + } +} + +trait AstTokenRegistrant: private::SealedToken { + /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST token type. + fn register() -> (&'static str, Box<[SyntaxKind]>); +} + +impl private::SealedToken for T {} + +impl AstTokenRegistrant for T { + fn register() -> (&'static str, Box<[SyntaxKind]>) { + ( + type_name::(), + ALL_SYNTAX_KIND + .iter() + .filter(|kind| T::can_cast(**kind)) + .cloned() + .collect::>() + .into_boxed_slice(), + ) + } +} + +mod tests { + use super::*; + + /// This test ensures there is a one-to-one mapping between CST elements + /// ([`SyntaxKind`]\(s)) and AST elements (Rust types that implement + /// the [`AstNode`] trait or the [`AstToken`] trait). + /// + /// The importance of this is described at the top of the module. + #[test] + fn ensure_one_to_one() { + let mut missing = Vec::new(); + let mut multiple = Vec::new(); + + let inverse_registry = inverse(); + + for kind in ALL_SYNTAX_KIND { + // NOTE: these are symbolic elements and should not be included in + // the analysis here. + if kind.is_symbolic() { + continue; + } + + match inverse_registry.get(kind) { + // SAFETY: because this is an inverse registry, only + // [`SyntaxKind`]s with at least one registered implementing + // type would be registered here. Thus, by design of the + // `inverse()` method, this will never occur. + Some(values) if values.is_empty() => { + unreachable!("the inverse registry should never contain an empty array") + } + Some(values) if values.len() > 1 => multiple.push((kind, values)), + None => missing.push(kind), + // NOTE: this is essentially only if the values exist and the + // length is 1—in that case, there is a one to one mapping, + // which is what we would like the case to be. + _ => {} + } + } + + if !missing.is_empty() { + let mut missing = missing + .into_iter() + .map(|kind| format!("{:?}", kind)) + .collect::>(); + missing.sort(); + + panic!( + "detected `SyntaxKind`s without an associated `AstNode`/`AstToken` (n={}): {}", + missing.len(), + missing.join(", ") + ) + } + + if !multiple.is_empty() { + multiple.sort(); + let mut multiple = multiple + .into_iter() + .map(|(kind, types)| { + let mut types = types.clone(); + types.sort(); + + let mut result = format!("== {:?} ==", kind); + for r#type in types { + result.push_str("\n* "); + result.push_str(r#type); + } + + result + }) + .collect::>(); + multiple.sort(); + + panic!( + "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s \ + (n={}):\n\n{}", + multiple.len(), + multiple.join("\n\n") + ) + } + } +} diff --git a/backup/wdl-format-old-2/src/task.rs b/backup/wdl-format-old-2/src/task.rs new file mode 100644 index 000000000..6e01b8843 --- /dev/null +++ b/backup/wdl-format-old-2/src/task.rs @@ -0,0 +1,455 @@ +//! A module for formatting elements in tasks. + +use wdl_ast::v1::CommandPart; +use wdl_ast::v1::CommandSection; +use wdl_ast::v1::CommandText; +use wdl_ast::v1::Decl; +use wdl_ast::v1::RequirementsItem; +use wdl_ast::v1::RequirementsSection; +use wdl_ast::v1::RuntimeItem; +use wdl_ast::v1::RuntimeSection; +use wdl_ast::v1::TaskDefinition; +use wdl_ast::v1::TaskItem; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; + +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::NEWLINE; + +impl Formattable for CommandText { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for CommandSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let command_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::CommandKeyword); formatter.indent(writer)?; + // write!(writer, "{}", command_keyword)?; + // format_inline_comment(&command_keyword, writer, formatter, true)?; + + // // coerce all command sections to use heredoc ('<<<>>>>') syntax + // // (as opposed to bracket ('{}') syntax) + // let open_section = if self.is_heredoc() { + // first_child_of_kind(self.syntax(), SyntaxKind::OpenHeredoc) + // } else { + // first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace) + // }; + // format_preceding_comments(&open_section, writer, formatter, true)?; + + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "<<<")?; + + // for part in self.parts() { + // match part { + // CommandPart::Text(t) => { + // t.format(writer, formatter)?; + // } + // CommandPart::Placeholder(p) => { + // p.format(writer, formatter)?; + // } + // } + // } + + // write!(writer, ">>>")?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RuntimeItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let name = self.name(); + // formatter.indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RuntimeSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let runtime_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::RuntimeKeyword); formatter.indent(writer)?; + // write!(writer, "{}", runtime_keyword)?; + // format_inline_comment(&runtime_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for item in self.items() { + // item.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, true)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RequirementsItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let name = self.name(); + // formatter.indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RequirementsSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let requirements_keyword = + // first_child_of_kind(self.syntax(), SyntaxKind::RequirementsKeyword); + // formatter.indent(writer)?; + // write!(writer, "{}", requirements_keyword)?; + // format_inline_comment(&requirements_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for item in self.items() { + // item.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, true)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for TaskDefinition { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let task_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::TaskKeyword); formatter.indent(writer)?; + // write!(writer, "{}", task_keyword)?; + // format_inline_comment(&task_keyword, writer, formatter, true)?; + + // let name = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // let mut meta_section_str = String::new(); + // let mut parameter_meta_section_str = String::new(); + // let mut input_section_str = String::new(); + // let mut declaration_section_str = String::new(); + // let mut command_section_str = String::new(); + // let mut output_section_str = String::new(); + // let mut runtime_section_str = String::new(); + // let mut hints_section_str = String::new(); + // let mut requirements_section_str = String::new(); + + // for item in self.items() { + // match item { + // TaskItem::Metadata(m) => { + // m.format(&mut meta_section_str, formatter)?; + // } + // TaskItem::ParameterMetadata(pm) => { + // pm.format(&mut parameter_meta_section_str, formatter)?; + // } + // TaskItem::Input(i) => { + // i.format(&mut input_section_str, formatter)?; + // } + // TaskItem::Declaration(d) => { + // Decl::Bound(d).format(&mut declaration_section_str, formatter)?; + // } + // TaskItem::Command(c) => { + // c.format(&mut command_section_str, formatter)?; + // } + // TaskItem::Output(o) => { + // o.format(&mut output_section_str, formatter)?; + // } + // TaskItem::Runtime(r) => { + // r.format(&mut runtime_section_str, formatter)?; + // } + // TaskItem::Hints(h) => { + // h.format(&mut hints_section_str, formatter)?; + // } + // TaskItem::Requirements(r) => { + // r.format(&mut requirements_section_str, formatter)?; + // } + // } + // } + + // let mut first_section = true; + + // if !meta_section_str.is_empty() { + // first_section = false; + // write!(writer, "{}", meta_section_str)?; + // } + // if !parameter_meta_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // first_section = false; + // write!(writer, "{}", parameter_meta_section_str)?; + // } + // if !input_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // first_section = false; + // write!(writer, "{}", input_section_str)?; + // } + // if !declaration_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // first_section = false; + // write!(writer, "{}", declaration_section_str)?; + // } + // // Command section is required + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", command_section_str)?; + // if !output_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", output_section_str)?; + // } + // if !runtime_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", runtime_section_str)?; + // } + // if !hints_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", hints_section_str)?; + // } + // if !requirements_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", requirements_section_str)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, true)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} diff --git a/backup/wdl-format-old-2/src/token.rs b/backup/wdl-format-old-2/src/token.rs new file mode 100644 index 000000000..e87111c36 --- /dev/null +++ b/backup/wdl-format-old-2/src/token.rs @@ -0,0 +1,82 @@ +use std::borrow::Cow; + +use crate::Writable; + +#[derive(Debug, Eq, PartialEq)] +pub enum Token<'a> { + Indent, + Dedent, + Literal(Cow<'a, str>), +} + +impl<'a> From<&'a str> for Token<'a> { + fn from(value: &'a str) -> Self { + Token::Literal(Cow::Borrowed(value)) + } +} + +impl From for Token<'_> { + fn from(value: String) -> Self { + Token::Literal(Cow::Owned(value)) + } +} + +#[derive(Debug, Default)] +pub struct TokenStream<'a>(pub(crate) Vec>); + +impl<'a> TokenStream<'a> { + pub fn indent(&mut self) { + self.0.push(Token::Indent); + } + + pub fn dedent(&mut self) { + self.0.push(Token::Dedent); + } + + pub fn write + 'a>(&mut self, value: W) { + value.write(self); + } + + pub fn indented(&mut self, mut f: F) { + // Indents the block. + self.indent(); + + // Runs the inner function. + f(self); + + // Dedents the block. + self.dedent(); + } + + pub fn inner(&self) -> &Vec> { + &self.0 + } + + pub fn into_inner(self) -> Vec> { + self.0 + } +} + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + + use super::*; + + #[test] + fn smoke() { + let mut stream = TokenStream::default(); + stream.indented(|stream| { + stream.write("Hello, world!"); + }); + + assert_eq!( + stream.into_inner(), + vec![ + Token::Indent, + Token::Literal(Cow::Owned("Hello, world!".to_string())), + Token::Dedent + ] + ) + } +} diff --git a/backup/wdl-format-old-2/src/tokens.rs b/backup/wdl-format-old-2/src/tokens.rs new file mode 100644 index 000000000..e3c525cac --- /dev/null +++ b/backup/wdl-format-old-2/src/tokens.rs @@ -0,0 +1,2089 @@ +//! V1 AST tokens. + +use crate::AstToken; +use crate::SyntaxKind; +use crate::SyntaxToken; + +/// A token representing the `after` keyword. +#[derive(Debug)] +pub struct AfterKeyword(SyntaxToken); + +impl AstToken for AfterKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AfterKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option + where + Self: Sized, + { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AfterKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "after") + } +} + +/// A token representing the `alias` keyword. +#[derive(Debug)] +pub struct AliasKeyword(SyntaxToken); + +impl AstToken for AliasKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AliasKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AliasKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "alias") + } +} + +/// A token representing the `Array` type keyword. +#[derive(Debug)] +pub struct ArrayTypeKeyword(SyntaxToken); + +impl AstToken for ArrayTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ArrayTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ArrayTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Array") + } +} + +/// A token representing the `as` keyword. +#[derive(Debug)] +pub struct AsKeyword(SyntaxToken); + +impl AstToken for AsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "as") + } +} + +/// A token representing the `=` symbol. +#[derive(Debug)] +pub struct Assignment(SyntaxToken); + +impl AstToken for Assignment { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Assignment) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Assignment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "=") + } +} + +/// A token representing the `*` symbol. +#[derive(Debug)] +pub struct Asterisk(SyntaxToken); + +impl AstToken for Asterisk { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Asterisk) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Asterisk { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "*") + } +} + +/// A token representing the `Boolean` keyword. +#[derive(Debug)] +pub struct BooleanTypeKeyword(SyntaxToken); + +impl AstToken for BooleanTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::BooleanTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for BooleanTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Boolean") + } +} + +/// A token representing the `call` keyword. +#[derive(Debug)] +pub struct CallKeyword(SyntaxToken); + +impl AstToken for CallKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CallKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CallKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "call") + } +} + +/// A token representing the `}` symbol. +#[derive(Debug)] +pub struct CloseBrace(SyntaxToken); + +impl AstToken for CloseBrace { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseBrace) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseBrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "}}") + } +} + +/// A token representing the `]` symbol. +#[derive(Debug)] +pub struct CloseBracket(SyntaxToken); + +impl AstToken for CloseBracket { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseBracket) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseBracket { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "]") + } +} + +/// A token representing the `>>>` token. +#[derive(Debug)] +pub struct CloseHeredoc(SyntaxToken); + +impl AstToken for CloseHeredoc { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseHeredoc) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseHeredoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">>>") + } +} + +/// A token representing the `)` symbol. +#[derive(Debug)] +pub struct CloseParen(SyntaxToken); + +impl AstToken for CloseParen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseParen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseParen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ")") + } +} + +/// A token representing the `:` symbol. +#[derive(Debug)] +pub struct Colon(SyntaxToken); + +impl AstToken for Colon { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Colon) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Colon { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ":") + } +} + +/// A token representing the `,` symbol. +#[derive(Debug)] +pub struct Comma(SyntaxToken); + +impl AstToken for Comma { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Comma) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Comma { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ",") + } +} + +/// A token representing the `command` keyword. +#[derive(Debug)] +pub struct CommandKeyword(SyntaxToken); + +impl AstToken for CommandKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CommandKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CommandKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "command") + } +} + +/// A token representing the `Directory` type keyword. +#[derive(Debug)] +pub struct DirectoryTypeKeyword(SyntaxToken); + +impl AstToken for DirectoryTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::DirectoryTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for DirectoryTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Directory") + } +} + +/// A token representing the `.` symbol. +#[derive(Debug)] +pub struct Dot(SyntaxToken); + +impl AstToken for Dot { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Dot) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Dot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ".") + } +} + +/// A token representing the `"` symbol. +#[derive(Debug)] +pub struct DoubleQuote(SyntaxToken); + +impl AstToken for DoubleQuote { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::DoubleQuote) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for DoubleQuote { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, r#"""#) + } +} + +/// A token representing the `else` keyword. +#[derive(Debug)] +pub struct ElseKeyword(SyntaxToken); + +impl AstToken for ElseKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ElseKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ElseKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "else") + } +} + +/// A token representing the `==` symbol. +#[derive(Debug)] +pub struct Equal(SyntaxToken); + +impl AstToken for Equal { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Equal) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Equal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "=") + } +} + +/// A token representing the `!` symbol. +#[derive(Debug)] +pub struct Exclamation(SyntaxToken); + +impl AstToken for Exclamation { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Exclamation) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Exclamation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!") + } +} + +/// A token representing the `**` keyword. +#[derive(Debug)] +pub struct Exponentiation(SyntaxToken); + +impl AstToken for Exponentiation { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Exponentiation) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Exponentiation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "**") + } +} + +/// A token representing the `false` keyword. +#[derive(Debug)] +pub struct FalseKeyword(SyntaxToken); + +impl AstToken for FalseKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FalseKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FalseKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "false") + } +} + +/// A token representing the `File` type keyword. +#[derive(Debug)] +pub struct FileTypeKeyword(SyntaxToken); + +impl AstToken for FileTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FileTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FileTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "File") + } +} + +/// A token representing the `Float` type keyword. +#[derive(Debug)] +pub struct FloatTypeKeyword(SyntaxToken); + +impl AstToken for FloatTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FloatTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FloatTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Float") + } +} + +/// A token representing the `>` symbol. +#[derive(Debug)] +pub struct Greater(SyntaxToken); + +impl AstToken for Greater { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Greater) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Greater { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">") + } +} + +/// A token representing the `>=` symbol. +#[derive(Debug)] +pub struct GreaterEqual(SyntaxToken); + +impl AstToken for GreaterEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::GreaterEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for GreaterEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">=") + } +} + +/// A token representing the `hints` keyword. +#[derive(Debug)] +pub struct HintsKeyword(SyntaxToken); + +impl AstToken for HintsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::HintsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for HintsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "hints") + } +} + +/// A token representing the `if` keyword. +#[derive(Debug)] +pub struct IfKeyword(SyntaxToken); + +impl AstToken for IfKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::IfKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for IfKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "if") + } +} + +/// A token representing the `import` keyword. +#[derive(Debug)] +pub struct ImportKeyword(SyntaxToken); + +impl AstToken for ImportKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ImportKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ImportKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "import") + } +} + +/// A token representing the `in` keyword. +#[derive(Debug)] +pub struct InKeyword(SyntaxToken); + +impl AstToken for InKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::InKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for InKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "in") + } +} + +/// A token representing the `input` keyword. +#[derive(Debug)] +pub struct InputKeyword(SyntaxToken); + +impl AstToken for InputKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::InputKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for InputKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "input") + } +} + +/// A token representing the `Int` type keyword. +#[derive(Debug)] +pub struct IntTypeKeyword(SyntaxToken); + +impl AstToken for IntTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::IntTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for IntTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Int") + } +} + +/// A token representing the `<` symbol. +#[derive(Debug)] +pub struct Less(SyntaxToken); + +impl AstToken for Less { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Less) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Less { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<") + } +} + +/// A token representing the `<=` symbol. +#[derive(Debug)] +pub struct LessEqual(SyntaxToken); + +impl AstToken for LessEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LessEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LessEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<=") + } +} + +/// A token representing the `&&` symbol. +#[derive(Debug)] +pub struct LogicalAnd(SyntaxToken); + +impl AstToken for LogicalAnd { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LogicalAnd) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LogicalAnd { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "&&") + } +} + +/// A token representing the `||` symbol. +#[derive(Debug)] +pub struct LogicalOr(SyntaxToken); + +impl AstToken for LogicalOr { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LogicalOr) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LogicalOr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "||") + } +} + +/// A token representing the `Map` type keyword. +#[derive(Debug)] +pub struct MapTypeKeyword(SyntaxToken); + +impl AstToken for MapTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::MapTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for MapTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Map") + } +} + +/// A token representing the `meta` keyword. +#[derive(Debug)] +pub struct MetaKeyword(SyntaxToken); + +impl AstToken for MetaKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::MetaKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for MetaKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "meta") + } +} + +/// A token representing the `-` symbol. +#[derive(Debug)] +pub struct Minus(SyntaxToken); + +impl AstToken for Minus { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Minus) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Minus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "-") + } +} + +/// A token representing the `None` keyword. +#[derive(Debug)] +pub struct NoneKeyword(SyntaxToken); + +impl AstToken for NoneKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NoneKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NoneKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "None") + } +} + +/// A token representing the `!=` symbol. +#[derive(Debug)] +pub struct NotEqual(SyntaxToken); + +impl AstToken for NotEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NotEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NotEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!=") + } +} + +/// A token representing the `null` keyword. +#[derive(Debug)] +pub struct NullKeyword(SyntaxToken); + +impl AstToken for NullKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NullKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NullKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "null") + } +} + +/// A token representing the `object` keyword. +#[derive(Debug)] +pub struct ObjectKeyword(SyntaxToken); + +impl AstToken for ObjectKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ObjectKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ObjectKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "object") + } +} + +/// A token representing the `Object` type keyword. +#[derive(Debug)] +pub struct ObjectTypeKeyword(SyntaxToken); + +impl AstToken for ObjectTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ObjectTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ObjectTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Object") + } +} + +/// A token representing the `{` symbol. +#[derive(Debug)] +pub struct OpenBrace(SyntaxToken); + +impl AstToken for OpenBrace { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenBrace) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenBrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{") + } +} + +/// A token representing the `[` symbol. +#[derive(Debug)] +pub struct OpenBracket(SyntaxToken); + +impl AstToken for OpenBracket { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenBracket) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenBracket { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[") + } +} + +/// A token representing the `<<<` symbol. +#[derive(Debug)] +pub struct OpenHeredoc(SyntaxToken); + +impl AstToken for OpenHeredoc { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenHeredoc) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenHeredoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<<<") + } +} + +/// A token representing the `(` keyword. +#[derive(Debug)] +pub struct OpenParen(SyntaxToken); + +impl AstToken for OpenParen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenParen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenParen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "(") + } +} + +/// A token representing the `output` keyword. +#[derive(Debug)] +pub struct OutputKeyword(SyntaxToken); + +impl AstToken for OutputKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OutputKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OutputKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "output") + } +} + +/// A token representing the `Pair` type keyword. +#[derive(Debug)] +pub struct PairTypeKeyword(SyntaxToken); + +impl AstToken for PairTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::PairTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for PairTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Pair") + } +} + +/// A token representing the `parameter_meta` keyword. +#[derive(Debug)] +pub struct ParameterMetaKeyword(SyntaxToken); + +impl AstToken for ParameterMetaKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ParameterMetaKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ParameterMetaKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "parameter_meta") + } +} + +/// A token representing the `%` symbol. +#[derive(Debug)] +pub struct Percent(SyntaxToken); + +impl AstToken for Percent { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Percent) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Percent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "%") + } +} + +/// Represents one of the placeholder open symbols. +#[derive(Debug)] +pub struct PlaceholderOpen(SyntaxToken); + +impl AstToken for PlaceholderOpen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::PlaceholderOpen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for PlaceholderOpen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // NOTE: this is deferred to the entire underlying string simply because + // we cannot known a priori what the captured text is. + write!(f, "{}", self.0) + } +} + +/// A token representing the `+` symbol. +#[derive(Debug)] +pub struct Plus(SyntaxToken); + +impl AstToken for Plus { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Plus) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Plus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "+") + } +} + +/// A token representing the `?` symbol. +#[derive(Debug)] +pub struct QuestionMark(SyntaxToken); + +impl AstToken for QuestionMark { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::QuestionMark) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for QuestionMark { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "?") + } +} + +/// A token representing the `requirements` keyword. +#[derive(Debug)] +pub struct RequirementsKeyword(SyntaxToken); + +impl AstToken for RequirementsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::RequirementsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for RequirementsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "requirements") + } +} + +/// A token representing the `runtime` keyword. +#[derive(Debug)] +pub struct RuntimeKeyword(SyntaxToken); + +impl AstToken for RuntimeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::RuntimeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for RuntimeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "runtime") + } +} + +/// A token representing the `scatter` keyword. +#[derive(Debug)] +pub struct ScatterKeyword(SyntaxToken); + +impl AstToken for ScatterKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ScatterKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ScatterKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "scatter") + } +} + +/// A token representing the `'` symbol. +#[derive(Debug)] +pub struct SingleQuote(SyntaxToken); + +impl AstToken for SingleQuote { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::SingleQuote) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for SingleQuote { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "'") + } +} + +/// A token representing the `/` symbol. +#[derive(Debug)] +pub struct Slash(SyntaxToken); + +impl AstToken for Slash { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Slash) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Slash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "/") + } +} + +/// A token representing the `String` type keyword. +#[derive(Debug)] +pub struct StringTypeKeyword(SyntaxToken); + +impl AstToken for StringTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::StringTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for StringTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "String") + } +} + +/// A token representing the `struct` keyword. +#[derive(Debug)] +pub struct StructKeyword(SyntaxToken); + +impl AstToken for StructKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::StructKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for StructKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "struct") + } +} + +/// A token representing the `task` keyword. +#[derive(Debug)] +pub struct TaskKeyword(SyntaxToken); + +impl AstToken for TaskKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::TaskKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for TaskKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "task") + } +} + +/// A token representing the `then` keyword. +#[derive(Debug)] +pub struct ThenKeyword(SyntaxToken); + +impl AstToken for ThenKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ThenKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ThenKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "then") + } +} + +/// A token representing the `true` keyword. +#[derive(Debug)] +pub struct TrueKeyword(SyntaxToken); + +impl AstToken for TrueKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::TrueKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for TrueKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "true") + } +} + +/// A token representing unknown contents within a WDL document. +#[derive(Debug)] +pub struct Unknown(SyntaxToken); + +impl AstToken for Unknown { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Unknown) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Unknown { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // NOTE: this is deferred to the entire underlying string simply because + // we cannot known a priori what the captured text is. + write!(f, "{}", self.0) + } +} + +/// A token representing the `version` keyword. +#[derive(Debug)] +pub struct VersionKeyword(SyntaxToken); + +impl AstToken for VersionKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::VersionKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for VersionKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "version") + } +} + +/// A token representing the `workflow` keyword. +#[derive(Debug)] +pub struct WorkflowKeyword(SyntaxToken); + +impl AstToken for WorkflowKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::WorkflowKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for WorkflowKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "workflow") + } +} diff --git a/backup/wdl-format-old-2/src/v1.rs b/backup/wdl-format-old-2/src/v1.rs new file mode 100644 index 000000000..561a58ebc --- /dev/null +++ b/backup/wdl-format-old-2/src/v1.rs @@ -0,0 +1,711 @@ +//! A module for formatting WDL v1 elements. + +use std::fmt::Write; + +use wdl_ast::v1::Decl; +use wdl_ast::v1::DefaultOption; +use wdl_ast::v1::DocumentItem; +use wdl_ast::v1::Expr; +use wdl_ast::v1::HintsItem; +use wdl_ast::v1::HintsSection; +use wdl_ast::v1::InputSection; +use wdl_ast::v1::LiteralBoolean; +use wdl_ast::v1::LiteralFloat; +use wdl_ast::v1::LiteralInteger; +use wdl_ast::v1::LiteralString; +use wdl_ast::v1::OutputSection; +use wdl_ast::v1::Placeholder; +use wdl_ast::v1::PlaceholderOption; +use wdl_ast::v1::SepOption; +use wdl_ast::v1::StringPart; +use wdl_ast::v1::StringText; +use wdl_ast::v1::StructDefinition; +use wdl_ast::v1::StructKeyword; +use wdl_ast::v1::TrueFalseOption; +use wdl_ast::v1::Type; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; +use wdl_grammar::SyntaxExt; + +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::NEWLINE; +use super::STRING_TERMINATOR; + +impl Formattable for DefaultOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let default_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); + // format_preceding_comments(&default_word, writer, formatter, true)?; + // write!(writer, "{}", default_word)?; + // format_inline_comment(&default_word, writer, formatter, true)?; + + // let assignment = first_child_of_kind(self.syntax(), SyntaxKind::Assignment); + // format_preceding_comments(&assignment, writer, formatter, true)?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // let value = self.value(); + // format_preceding_comments( + // &SyntaxElement::from(value.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // value.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(value.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for SepOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let sep_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); + // format_preceding_comments(&sep_word, writer, formatter, true)?; + // write!(writer, "{}", sep_word)?; + // format_inline_comment(&sep_word, writer, formatter, true)?; + + // let assignment = first_child_of_kind(self.syntax(), + // SyntaxKind::Assignment); format_preceding_comments(&assignment, + // writer, formatter, true)?; formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // let separator = self.separator(); + // format_preceding_comments( + // &SyntaxElement::from(separator.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // separator.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(separator.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for TrueFalseOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let mut true_clause = String::new(); + // let mut false_clause = String::new(); + // let mut which_clause = None; + // for child in self.syntax().children_with_tokens() { + // match child.kind() { + // SyntaxKind::TrueKeyword => { + // which_clause = Some(true); + + // format_preceding_comments(&child, &mut true_clause, formatter, + // true)?; write!(true_clause, "{}", child)?; + // format_inline_comment(&child, &mut true_clause, formatter, + // true)?; } + // SyntaxKind::FalseKeyword => { + // which_clause = Some(false); + + // format_preceding_comments(&child, &mut false_clause, formatter, + // true)?; write!(false_clause, "{}", child)?; + // format_inline_comment(&child, &mut false_clause, formatter, + // true)?; } + // SyntaxKind::Assignment => { + // let cur_clause = match which_clause { + // Some(true) => &mut true_clause, + // Some(false) => &mut false_clause, + // _ => unreachable!( + // "should have found a true or false keyword before an + // assignment" ), + // }; + + // format_preceding_comments(&child, cur_clause, formatter, true)?; + // formatter.space_or_indent(cur_clause)?; + // write!(cur_clause, "{}", child)?; + // format_inline_comment(&child, cur_clause, formatter, true)?; + // } + // SyntaxKind::LiteralStringNode => { + // let cur_clause = match which_clause { + // Some(true) => &mut true_clause, + // Some(false) => &mut false_clause, + // _ => unreachable!( + // "should have found a true or false keyword before a + // string" ), + // }; + + // format_preceding_comments(&child, cur_clause, formatter, true)?; + // formatter.space_or_indent(cur_clause)?; + // let literal_string = LiteralString::cast( + // child + // .as_node() + // .expect("LiteralStringNode should be a node") + // .clone(), + // ) + // .expect("LiteralStringNode should cast to a LiteralString"); + // literal_string.format(cur_clause, formatter)?; + // format_inline_comment(&child, writer, formatter, true)?; + // } + // SyntaxKind::Whitespace => { + // // Ignore + // } + // SyntaxKind::Comment => { + // // Handled by a call to `format_preceding_comments` + // // or `format_inline_comment` in another match arm. + // } + // _ => { + // unreachable!("Unexpected syntax kind: {:?}", child.kind()); + // } + // } + // } + // write!(writer, "{} {}", true_clause, false_clause)?; + + Ok(()) + } +} + +impl Formattable for PlaceholderOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + PlaceholderOption::Default(default) => default.format(writer, formatter), + PlaceholderOption::Sep(sep) => sep.format(writer, formatter), + PlaceholderOption::TrueFalse(true_false) => true_false.format(writer, formatter), + } + } +} + +impl Formattable for Placeholder { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // coerce all placeholders into '~{}' placeholders + // (as opposed to '${}' placeholders) + write!(writer, "~{{")?; + + let mut option_present = false; + if let Some(option) = self.options().next() { + option.format(writer, formatter)?; + option_present = true; + } + + let expr = self.expr(); + if option_present { + formatter.space_or_indent(writer)?; + } + expr.format(writer, formatter)?; + + write!(writer, "}}") + } +} + +impl Formattable for StringText { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + let mut iter = self.as_str().chars().peekable(); + let mut prev_c = None; + while let Some(c) = iter.next() { + match c { + '\\' => { + if let Some(next_c) = iter.peek() { + if *next_c == '\'' { + // Do not write this backslash + prev_c = Some(c); + continue; + } + } + writer.write_char(c)?; + } + '"' => { + if let Some(pc) = prev_c { + if pc != '\\' { + writer.write_char('\\')?; + } + } + writer.write_char(c)?; + } + _ => { + writer.write_char(c)?; + } + } + prev_c = Some(c); + } + + Ok(()) + } +} + +impl Formattable for LiteralString { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", STRING_TERMINATOR)?; + for part in self.parts() { + match part { + StringPart::Text(text) => { + text.format(writer, formatter)?; + } + StringPart::Placeholder(placeholder) => { + placeholder.format(writer, formatter)?; + } + } + } + write!(writer, "{}", STRING_TERMINATOR) + } +} + +impl Formattable for LiteralBoolean { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.value()) // TODO + } +} + +impl Formattable for LiteralFloat { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for LiteralInteger { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for Type { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for Expr { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for Decl { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let ty = self.ty(); + // formatter.indent(writer)?; + // ty.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(ty.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let name = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // if let Some(expr) = self.expr() { + // let assignment = first_child_of_kind(self.syntax(), + // SyntaxKind::Assignment); format_preceding_comments(&assignment, + // writer, formatter, true)?; formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // } + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for InputSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let input_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::InputKeyword); formatter.indent(writer)?; + // write!(writer, "{}", input_keyword)?; + // format_inline_comment(&input_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for decl in self.declarations() { + // decl.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for OutputSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let output_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::OutputKeyword); formatter.indent(writer)?; + // write!(writer, "{}", output_keyword)?; + // format_inline_comment(&output_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for decl in self.declarations() { + // Decl::Bound(decl).format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for HintsItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let name = self.name(); + // formatter.indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for HintsSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let hints_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::HintsKeyword); formatter.indent(writer)?; + // write!(writer, "{}", hints_keyword)?; + // format_inline_comment(&hints_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for item in self.items() { + // item.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for StructKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for StructDefinition { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), false, true)?; + + let struct_keyword = self.keyword(); + struct_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, struct_keyword.syntax().inline_comment(), true)?; + + let name = self.name(); + formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; + formatter.space_or_indent(writer)?; + name.format(writer, formatter)?; + formatter.format_inline_comment(writer, name.syntax().inline_comment(), true)?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // if let Some(m) = self.metadata().next() { + // m.format(writer, formatter)?; + // write!(writer, "{}", NEWLINE)?; + // } + + // if let Some(pm) = self.parameter_metadata().next() { + // pm.format(writer, formatter)?; + // write!(writer, "{}", NEWLINE)?; + // } + + // for decl in self.members() { + // Decl::Unbound(decl).format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = self + // .syntax() + // .children_with_tokens() + // .find(|element| element.kind() == SyntaxKind::CloseBrace) + // .expect("StructDefinition should have a close brace"); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for DocumentItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + DocumentItem::Import(_) => { + unreachable!("Import statements should not be formatted as a DocumentItem") + } + DocumentItem::Workflow(workflow) => workflow.format(writer, formatter), + DocumentItem::Task(task) => task.format(writer, formatter), + DocumentItem::Struct(structure) => structure.format(writer, formatter), + } + } +} diff --git a/backup/wdl-format-old-2/src/workflow.rs b/backup/wdl-format-old-2/src/workflow.rs new file mode 100644 index 000000000..f82e57003 --- /dev/null +++ b/backup/wdl-format-old-2/src/workflow.rs @@ -0,0 +1,666 @@ +//! A module for formatting elements in workflows. + +use wdl_ast::v1::CallAfter; +use wdl_ast::v1::CallAlias; +use wdl_ast::v1::CallInputItem; +use wdl_ast::v1::CallStatement; +use wdl_ast::v1::ConditionalStatement; +use wdl_ast::v1::Decl; +use wdl_ast::v1::ScatterStatement; +use wdl_ast::v1::WorkflowDefinition; +use wdl_ast::v1::WorkflowItem; +use wdl_ast::v1::WorkflowStatement; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; + +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::NEWLINE; + +impl Formattable for CallAlias { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let as_keyword = first_child_of_kind(self.syntax(), SyntaxKind::AsKeyword); + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", as_keyword)?; + // format_inline_comment(&as_keyword, writer, formatter, true)?; + + // let ident = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // ident.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for CallAfter { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let after_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::AfterKeyword); formatter.space_or_indent(writer)?; + // write!(writer, "{}", after_keyword)?; + // format_inline_comment(&after_keyword, writer, formatter, true)?; + + // let ident = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // ident.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for CallInputItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let name = self.name(); + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // if let Some(expr) = self.expr() { + // let assignment = first_child_of_kind(self.syntax(), + // SyntaxKind::Assignment); format_preceding_comments(&assignment, + // writer, formatter, true)?; formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // } + + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for CallStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let call_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::CallKeyword); formatter.indent(writer)?; + // write!(writer, "{}", call_keyword)?; + // format_inline_comment(&call_keyword, writer, formatter, true)?; + + // let target = self.target(); + // format_preceding_comments( + // &SyntaxElement::Node(target.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", target.syntax())?; + // format_inline_comment( + // &SyntaxElement::Node(target.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // if let Some(alias) = self.alias() { + // alias.format(writer, formatter)?; + // } + + // for after in self.after() { + // after.format(writer, formatter)?; + // } + + // let inputs: Vec<_> = self.inputs().collect(); + // if !inputs.is_empty() { + // let open_brace = first_child_of_kind(self.syntax(), + // SyntaxKind::OpenBrace); format_preceding_comments(&open_brace, + // writer, formatter, true)?; // Open braces should ignore the "+1 + // rule" followed by other interrupted // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, true)?; + + // // TODO consider detecting if document is >= v1.2 and forcing the + // optional input // syntax + // if let Some(input_keyword) = self + // .syntax() + // .children_with_tokens() + // .find(|c| c.kind() == SyntaxKind::InputKeyword) + // { + // format_preceding_comments(&input_keyword, writer, formatter, true)?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", input_keyword)?; + // format_inline_comment(&input_keyword, writer, formatter, true)?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + // } // else v1.2 syntax + + // if inputs.len() == 1 { + // let input = inputs.first().expect("inputs should have a first + // element"); format_preceding_comments( + // &SyntaxElement::from(input.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // input.format(writer, formatter)?; + // // TODO there may be a trailing comma with comments attached to it + + // let close_brace = first_child_of_kind(self.syntax(), + // SyntaxKind::CloseBrace); format_preceding_comments(& + // close_brace, writer, formatter, true)?; formatter. + // space_or_indent(writer)?; write!(writer, "{}", close_brace)?; + // } else { + // // multiple inputs + // let mut commas = self + // .syntax() + // .children_with_tokens() + // .filter(|c| c.kind() == SyntaxKind::Comma); + + // formatter.increment_indent(); + + // for input in inputs { + // if !formatter.interrupted() { + // write!(writer, "{}", NEWLINE)?; + // } else { + // formatter.reset_interrupted(); + // } + // format_preceding_comments( + // &SyntaxElement::from(input.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + // formatter.indent(writer)?; + // input.format(writer, formatter)?; + // if let Some(cur_comma) = commas.next() { + // format_preceding_comments(&cur_comma, writer, formatter, + // true)?; write!(writer, ",")?; + // format_inline_comment(&cur_comma, writer, formatter, true)?; + // } else { + // write!(writer, ",")?; + // } + // } + // if !formatter.interrupted() { + // write!(writer, "{}", NEWLINE)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), + // SyntaxKind::CloseBrace); format_preceding_comments(& + // close_brace, writer, formatter, false)?; formatter. + // indent(writer)?; write!(writer, "{}", close_brace)?; + // } + // } + + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for ConditionalStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let if_keyword = first_child_of_kind(self.syntax(), SyntaxKind::IfKeyword); + // formatter.indent(writer)?; + // write!(writer, "{}", if_keyword)?; + // format_inline_comment(&if_keyword, writer, formatter, true)?; + + // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); + // format_preceding_comments(&open_paren, writer, formatter, true)?; + // // Open parens should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_paren)?; + + // let mut paren_on_same_line = true; + // let expr = self.expr(); + // // PERF: This calls `to_string()` which is also called later by `format()` + // // There should be a way to avoid this. + // let multiline_expr = expr.syntax().to_string().contains(NEWLINE); + + // format_inline_comment(&open_paren, writer, formatter, !multiline_expr)?; + // if multiline_expr { + // formatter.increment_indent(); + // paren_on_same_line = false; + // } + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // !multiline_expr, + // )?; + // if formatter.interrupted() || multiline_expr { + // formatter.indent(writer)?; + // paren_on_same_line = false; + // } + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // !multiline_expr, + // )?; + // if formatter.interrupted() { + // paren_on_same_line = false; + // } + + // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); + // format_preceding_comments(&close_paren, writer, formatter, !multiline_expr)?; + // if formatter.interrupted() || !paren_on_same_line { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", close_paren)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for stmt in self.statements() { + // stmt.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for ScatterStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let scatter_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::ScatterKeyword); formatter.indent(writer)?; + // write!(writer, "{}", scatter_keyword)?; + // format_inline_comment(&scatter_keyword, writer, formatter, true)?; + + // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); + // format_preceding_comments(&open_paren, writer, formatter, true)?; + // // Open parens should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_paren)?; + // format_inline_comment(&open_paren, writer, formatter, true)?; + + // let ident = self.variable(); + // format_preceding_comments( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // ident.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let in_keyword = first_child_of_kind(self.syntax(), SyntaxKind::InKeyword); + // format_preceding_comments(&in_keyword, writer, formatter, true)?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", in_keyword)?; + // format_inline_comment(&in_keyword, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); + // format_preceding_comments(&close_paren, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", close_paren)?; + // format_inline_comment(&close_paren, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for stmt in self.statements() { + // stmt.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for WorkflowStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + WorkflowStatement::Call(c) => c.format(writer, formatter), + WorkflowStatement::Conditional(c) => c.format(writer, formatter), + WorkflowStatement::Scatter(s) => s.format(writer, formatter), + WorkflowStatement::Declaration(d) => Decl::Bound(d.clone()).format(writer, formatter), + } + } +} + +impl Formattable for WorkflowDefinition { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let workflow_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::WorkflowKeyword); write!(writer, "{}", + // workflow_keyword)?; format_inline_comment(&workflow_keyword, writer, + // formatter, true)?; + + // let name = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // let mut meta_section_str = String::new(); + // let mut parameter_meta_section_str = String::new(); + // let mut input_section_str = String::new(); + // let mut body_str = String::new(); + // let mut output_section_str = String::new(); + // let mut hints_section_str = String::new(); + + // for item in self.items() { + // match item { + // WorkflowItem::Metadata(m) => { + // m.format(&mut meta_section_str, formatter)?; + // } + // WorkflowItem::ParameterMetadata(pm) => { + // pm.format(&mut parameter_meta_section_str, formatter)?; + // } + // WorkflowItem::Input(i) => { + // i.format(&mut input_section_str, formatter)?; + // } + // WorkflowItem::Call(c) => { + // c.format(&mut body_str, formatter)?; + // } + // WorkflowItem::Conditional(c) => { + // c.format(&mut body_str, formatter)?; + // } + // WorkflowItem::Scatter(s) => { + // s.format(&mut body_str, formatter)?; + // } + // WorkflowItem::Declaration(d) => { + // Decl::Bound(d).format(&mut body_str, formatter)?; + // } + // WorkflowItem::Output(o) => { + // o.format(&mut output_section_str, formatter)?; + // } + // WorkflowItem::Hints(h) => { + // h.format(&mut hints_section_str, formatter)?; + // } + // } + // } + + // let mut first_section = true; + // if !meta_section_str.is_empty() { + // first_section = false; + // write!(writer, "{}", meta_section_str)?; + // } + // if !parameter_meta_section_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", parameter_meta_section_str)?; + // } + // if !input_section_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", input_section_str)?; + // } + // if !body_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", body_str)?; + // } + // if !output_section_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", output_section_str)?; + // } + // if !hints_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", hints_section_str)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} diff --git a/backup/wdl-format-old-2/tests/format.rs b/backup/wdl-format-old-2/tests/format.rs new file mode 100644 index 000000000..d7d6dc5d8 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format.rs @@ -0,0 +1,192 @@ +//! The format file tests. +//! +//! This test looks for directories in `tests/format`. +//! +//! Each directory is expected to contain: +//! +//! * `source.wdl` - the test input source to parse. +//! * `source.formatted` - the expected formatted output. +//! +//! The `source.formatted` file may be automatically generated or updated by +//! setting the `BLESS` environment variable when running this test. + +use std::collections::HashSet; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::exit; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use codespan_reporting::files::SimpleFile; +use codespan_reporting::term; +use codespan_reporting::term::termcolor::Buffer; +use codespan_reporting::term::Config; +use colored::Colorize; +use pretty_assertions::StrComparison; +use rayon::prelude::*; +use wdl_ast::Diagnostic; +use wdl_format::format_document; + +fn find_tests() -> Vec { + // Check for filter arguments consisting of test names + let mut filter = HashSet::new(); + for arg in std::env::args().skip_while(|a| a != "--").skip(1) { + if !arg.starts_with('-') { + filter.insert(arg); + } + } + + let mut tests: Vec = Vec::new(); + for entry in Path::new("tests/format").read_dir().unwrap() { + let entry = entry.expect("failed to read directory"); + let path = entry.path(); + if !path.is_dir() + || (!filter.is_empty() + && !filter.contains(entry.file_name().to_str().expect("name should be UTF-8"))) + { + continue; + } + + tests.push(path); + } + + tests.sort(); + tests +} + +fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { + let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); + let mut buffer = Buffer::no_color(); + for diagnostic in diagnostics { + term::emit( + &mut buffer, + &Config::default(), + &file, + &diagnostic.to_codespan(), + ) + .expect("should emit"); + } + + String::from_utf8(buffer.into_inner()).expect("should be UTF-8") +} + +fn compare_result(path: &Path, result: &str) -> Result<(), String> { + if env::var_os("BLESS").is_some() { + fs::write(path, &result).map_err(|e| { + format!( + "failed to write result file `{path}`: {e}", + path = path.display() + ) + })?; + return Ok(()); + } + + let expected = fs::read_to_string(path) + .map_err(|e| { + format!( + "failed to read result file `{path}`: {e}", + path = path.display() + ) + })? + .replace("\r\n", "\n"); + + if expected != result { + return Err(format!( + "result is not as expected:\n{}", + StrComparison::new(&expected, &result), + )); + } + + Ok(()) +} + +fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { + let path = test.join("source.wdl"); + let source = std::fs::read_to_string(&path).map_err(|e| { + format!( + "failed to read source file `{path}`: {e}", + path = path.display() + ) + })?; + + let formatted = format_document(&source).map_err(|e| { + format!( + "failed to format `{path}`: {e}", + path = path.display(), + e = format_diagnostics(&e, path.as_path(), &source) + ) + })?; + compare_result(path.with_extension("formatted.wdl").as_path(), &formatted)?; + + ntests.fetch_add(1, Ordering::SeqCst); + Ok(()) +} + +fn main() { + let tests = find_tests(); + println!("\nrunning {} tests\n", tests.len()); + + let ntests = AtomicUsize::new(0); + let errors = tests + .par_iter() + .filter_map(|test| { + let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); + match std::panic::catch_unwind(|| { + match run_test(test, &ntests) + .map_err(|e| format!("failed to run test `{path}`: {e}", path = test.display())) + .err() + { + Some(e) => { + println!("test {test_name} ... {failed}", failed = "failed".red()); + Some((test_name, e)) + } + None => { + println!("test {test_name} ... {ok}", ok = "ok".green()); + None + } + } + }) { + Ok(result) => result, + Err(e) => { + println!( + "test {test_name} ... {panicked}", + panicked = "panicked".red() + ); + Some(( + test_name, + format!( + "test panicked: {e:?}", + e = e + .downcast_ref::() + .map(|s| s.as_str()) + .or_else(|| e.downcast_ref::<&str>().copied()) + .unwrap_or("no panic message") + ), + )) + } + } + }) + .collect::>(); + + if !errors.is_empty() { + eprintln!( + "\n{count} test(s) {failed}:", + count = errors.len(), + failed = "failed".red() + ); + + for (name, msg) in errors.iter() { + eprintln!("{name}: {msg}", msg = msg.red()); + } + + exit(1); + } + + println!( + "\ntest result: ok. {} passed\n", + ntests.load(Ordering::SeqCst) + ); +} diff --git a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt new file mode 100644 index 000000000..d9a98e06c --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt @@ -0,0 +1,25 @@ +'source.wdl' obtained from: https://github.com/ENCODE-DCC/chip-seq-pipeline2/blob/26eeda81a0540dc793fc69b0c390d232ca7ca50a/chip.wdl +on the date 08-05-2024. +It was accompanied by the following license: + +MIT License + +Copyright (c) 2017 ENCODE DCC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl new file mode 100644 index 000000000..7c8de0324 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -0,0 +1 @@ +version 1.0 diff --git a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl new file mode 100644 index 000000000..92c09ea84 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl @@ -0,0 +1,3296 @@ +version 1.0 + +struct RuntimeEnvironment { + String docker + String singularity + String conda +} + +workflow chip { + String pipeline_ver = 'v2.2.2' + + meta { + version: 'v2.2.2' + + author: 'Jin wook Lee' + email: 'leepc12@gmail.com' + description: 'ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil.' + organization: 'ENCODE DCC' + + specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing' + + default_docker: 'encodedcc/chip-seq-pipeline:v2.2.2' + default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json' + + parameter_group: { + runtime_environment: { + title: 'Runtime environment', + description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.' + }, + pipeline_metadata: { + title: 'Pipeline metadata', + description: 'Metadata for a pipeline (e.g. title and description).' + }, + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.', + help: 'Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input genomic data', + description: 'Genomic input files for experiment.', + help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].' + }, + input_genomic_data_control: { + title: 'Input genomic data (control)', + description: 'Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.', + help: 'Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.' + }, + pipeline_parameter: { + title: 'Pipeline parameter', + description: 'Pipeline type and flags to turn on/off analyses.', + help: 'Use chip.align_only to align FASTQs without peak calling.' + }, + alignment: { + title: 'Alignment', + description: 'Parameters for alignment.', + help: 'Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.' + }, + peak_calling: { + title: 'Peak calling', + description: 'Parameters for peak calling.', + help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.' + }, + resource_parameter: { + title: 'Resource parameter', + description: 'Number of CPUs (threads), max. memory and walltime for tasks.', + help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.' + } + } + } + input { + # group: runtime_environment + String docker = 'encodedcc/chip-seq-pipeline:v2.2.2' + String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + String conda = 'encd-chip' + String conda_macs2 = 'encd-chip-macs2' + String conda_spp = 'encd-chip-spp' + + # group: pipeline_metadata + String title = 'Untitled' + String description = 'No description' + + # group: reference_genome + File? genome_tsv + String? genome_name + File? ref_fa + File? bwa_idx_tar + File? bowtie2_idx_tar + File? chrsz + File? blacklist + File? blacklist2 + String? mito_chr_name + String? regex_bfilt_peak_chr_name + String? gensz + File? custom_aligner_idx_tar + + # group: input_genomic_data + Boolean? paired_end + Array[Boolean] paired_ends = [] + Array[File] fastqs_rep1_R1 = [] + Array[File] fastqs_rep1_R2 = [] + Array[File] fastqs_rep2_R1 = [] + Array[File] fastqs_rep2_R2 = [] + Array[File] fastqs_rep3_R1 = [] + Array[File] fastqs_rep3_R2 = [] + Array[File] fastqs_rep4_R1 = [] + Array[File] fastqs_rep4_R2 = [] + Array[File] fastqs_rep5_R1 = [] + Array[File] fastqs_rep5_R2 = [] + Array[File] fastqs_rep6_R1 = [] + Array[File] fastqs_rep6_R2 = [] + Array[File] fastqs_rep7_R1 = [] + Array[File] fastqs_rep7_R2 = [] + Array[File] fastqs_rep8_R1 = [] + Array[File] fastqs_rep8_R2 = [] + Array[File] fastqs_rep9_R1 = [] + Array[File] fastqs_rep9_R2 = [] + Array[File] fastqs_rep10_R1 = [] + Array[File] fastqs_rep10_R2 = [] + Array[File] bams = [] + Array[File] nodup_bams = [] + Array[File] tas = [] + Array[File] peaks = [] + Array[File] peaks_pr1 = [] + Array[File] peaks_pr2 = [] + File? peak_ppr1 + File? peak_ppr2 + File? peak_pooled + + Boolean? ctl_paired_end + Array[Boolean] ctl_paired_ends = [] + Array[File] ctl_fastqs_rep1_R1 = [] + Array[File] ctl_fastqs_rep1_R2 = [] + Array[File] ctl_fastqs_rep2_R1 = [] + Array[File] ctl_fastqs_rep2_R2 = [] + Array[File] ctl_fastqs_rep3_R1 = [] + Array[File] ctl_fastqs_rep3_R2 = [] + Array[File] ctl_fastqs_rep4_R1 = [] + Array[File] ctl_fastqs_rep4_R2 = [] + Array[File] ctl_fastqs_rep5_R1 = [] + Array[File] ctl_fastqs_rep5_R2 = [] + Array[File] ctl_fastqs_rep6_R1 = [] + Array[File] ctl_fastqs_rep6_R2 = [] + Array[File] ctl_fastqs_rep7_R1 = [] + Array[File] ctl_fastqs_rep7_R2 = [] + Array[File] ctl_fastqs_rep8_R1 = [] + Array[File] ctl_fastqs_rep8_R2 = [] + Array[File] ctl_fastqs_rep9_R1 = [] + Array[File] ctl_fastqs_rep9_R2 = [] + Array[File] ctl_fastqs_rep10_R1 = [] + Array[File] ctl_fastqs_rep10_R2 = [] + Array[File] ctl_bams = [] + Array[File] ctl_nodup_bams = [] + Array[File] ctl_tas = [] + + # group: pipeline_parameter + String pipeline_type + Boolean align_only = false + Boolean redact_nodup_bam = false + Boolean true_rep_only = false + Boolean enable_count_signal_track = false + Boolean enable_jsd = true + Boolean enable_gc_bias = true + + # group: alignment + String aligner = 'bowtie2' + File? custom_align_py + Boolean use_bwa_mem_for_pe = false + Int bwa_mem_read_len_limit = 70 + Boolean use_bowtie2_local_mode = false + Int crop_length = 0 + Int crop_length_tol = 2 + String trimmomatic_phred_score_format = 'auto' + Int xcor_trim_bp = 50 + Boolean use_filt_pe_ta_for_xcor = false + String dup_marker = 'picard' + Boolean no_dup_removal = false + Int mapq_thresh = 30 + Array[String] filter_chrs = [] + Int subsample_reads = 0 + Int ctl_subsample_reads = 0 + Int xcor_subsample_reads = 15000000 + Int xcor_exclusion_range_min = -500 + Int? xcor_exclusion_range_max + Int pseudoreplication_random_seed = 0 + + # group: peak_calling + Int ctl_depth_limit = 200000000 + Float exp_ctl_depth_ratio_limit = 5.0 + Array[Int?] fraglen = [] + String? peak_caller + Boolean always_use_pooled_ctl = true + Float ctl_depth_ratio = 1.2 + Int? cap_num_peak + Float pval_thresh = 0.01 + Float fdr_thresh = 0.01 + Float idr_thresh = 0.05 + + # group: resource_parameter + Int align_cpu = 6 + Float align_bowtie2_mem_factor = 0.15 + Float align_bwa_mem_factor = 1.0 + Int align_time_hr = 48 + Float align_bowtie2_disk_factor = 8.0 + Float align_bwa_disk_factor = 8.0 + + Int filter_cpu = 4 + Float filter_mem_factor = 0.4 + Int filter_time_hr = 24 + Float filter_disk_factor = 8.0 + + Int bam2ta_cpu = 2 + Float bam2ta_mem_factor = 0.35 + Int bam2ta_time_hr = 6 + Float bam2ta_disk_factor = 4.0 + + Float spr_mem_factor = 20.0 + Float spr_disk_factor = 30.0 + + Int jsd_cpu = 4 + Float jsd_mem_factor = 0.1 + Int jsd_time_hr = 6 + Float jsd_disk_factor = 2.0 + + Int xcor_cpu = 2 + Float xcor_mem_factor = 1.0 + Int xcor_time_hr = 24 + Float xcor_disk_factor = 4.5 + + Float subsample_ctl_mem_factor = 22.0 + Float subsample_ctl_disk_factor = 15.0 + + Float macs2_signal_track_mem_factor = 12.0 + Int macs2_signal_track_time_hr = 24 + Float macs2_signal_track_disk_factor = 80.0 + + Int call_peak_cpu = 6 + Float call_peak_spp_mem_factor = 5.0 + Float call_peak_macs2_mem_factor = 5.0 + Int call_peak_time_hr = 72 + Float call_peak_spp_disk_factor = 5.0 + Float call_peak_macs2_disk_factor = 30.0 + + String? align_trimmomatic_java_heap + String? filter_picard_java_heap + String? gc_bias_picard_java_heap + } + + parameter_meta { + docker: { + description: 'Default Docker image URI to run WDL tasks.', + group: 'runtime_environment', + example: 'ubuntu:20.04' + } + singularity: { + description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.', + group: 'runtime_environment', + example: 'docker://ubuntu:20.04' + } + conda: { + description: 'Default Conda environment name to run WDL tasks. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip' + } + conda_macs2: { + description: 'Conda environment name for task macs2. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-macs2' + } + conda_spp: { + description: 'Conda environment name for tasks spp/xcor. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-spp' + } + title: { + description: 'Experiment title.', + group: 'pipeline_metadata', + example: 'ENCSR936XTK (subsampled 1/50)' + } + description: { + description: 'Experiment description.', + group: 'pipeline_metadata', + example: 'ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)' + } + genome_tsv: { + description: 'Reference genome database TSV.', + group: 'reference_genome', + help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.', + example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv' + } + genome_name: { + description: 'Genome name.', + group: 'reference_genome' + } + ref_fa: { + description: 'Reference FASTA file.', + group: 'reference_genome' + } + bowtie2_idx_tar: { + description: 'BWA index TAR file.', + group: 'reference_genome' + } + custom_aligner_idx_tar: { + description: 'Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.', + group: 'reference_genome' + } + chrsz: { + description: '2-col chromosome sizes file.', + group: 'reference_genome' + } + blacklist: { + description: 'Blacklist file in BED format.', + group: 'reference_genome', + help: 'Peaks will be filtered with this file.' + } + blacklist2: { + description: 'Secondary blacklist file in BED format.', + group: 'reference_genome', + help: 'If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.' + } + mito_chr_name: { + description: 'Mitochondrial chromosome name.', + group: 'reference_genome', + help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.' + } + regex_bfilt_peak_chr_name: { + description: 'Reg-ex for chromosomes to keep while filtering peaks.', + group: 'reference_genome', + help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.' + } + gensz: { + description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.', + group: 'reference_genome' + } + paired_end: { + description: 'Sequencing endedness.', + group: 'input_genomic_data', + help: 'Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.', + example: true + } + paired_ends: { + description: 'Sequencing endedness array (for mixed SE/PE datasets).', + group: 'input_genomic_data', + help: 'Whether each biological replicate is paired ended or not.' + } + fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + bams: { + description: 'List of unfiltered/raw BAM files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].' + } + nodup_bams: { + description: 'List of filtered/deduped BAM files for each biological replicate', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].' + } + tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].' + } + peaks: { + description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.' + } + peaks_pr1: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peaks_pr2: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peak_pooled: { + description: 'NARROWPEAK file for pooled true replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.' + } + peak_ppr1: { + description: 'NARROWPEAK file for pooled pseudo replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.' + } + peak_ppr2: { + description: 'NARROWPEAK file for pooled pseudo replicate 2.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.' + } + + ctl_paired_end: { + description: 'Sequencing endedness for all controls.', + group: 'input_genomic_data_control', + help: 'Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.' + } + ctl_paired_ends: { + description: 'Sequencing endedness array for mixed SE/PE controls.', + group: 'input_genomic_data_control', + help: 'Whether each control replicate is paired ended or not.' + } + ctl_fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_bams: { + description: 'List of unfiltered/raw BAM files for each control replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].' + } + ctl_nodup_bams: { + description: 'List of filtered/deduped BAM files for each control replicate', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].' + } + ctl_tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].' + } + + pipeline_type: { + description: 'Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.', + group: 'pipeline_parameter', + help: 'Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.', + choices: ['tf', 'histone', 'control'], + example: 'tf' + } + redact_nodup_bam: { + description: 'Redact filtered/nodup BAM.', + group: 'pipeline_parameter', + help: 'Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.' + } + align_only: { + description: 'Align only mode.', + group: 'pipeline_parameter', + help: 'Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.' + } + true_rep_only: { + description: 'Disables all analyses related to pseudo-replicates.', + group: 'pipeline_parameter', + help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).' + } + enable_count_signal_track: { + description: 'Enables generation of count signal tracks.', + group: 'pipeline_parameter' + } + enable_jsd: { + description: 'Enables Jensen-Shannon Distance (JSD) plot generation.', + group: 'pipeline_parameter' + } + enable_gc_bias: { + description: 'Enables GC bias calculation.', + group: 'pipeline_parameter' + } + + aligner: { + description: 'Aligner. bowtie2, bwa or custom', + group: 'alignment', + help: 'It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + choices: ['bowtie2', 'bwa', 'custom'], + example: 'bowtie2' + } + custom_align_py: { + description: 'Python script for a custom aligner.', + group: 'alignment', + help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".' + } + use_bwa_mem_for_pe: { + description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.', + group: 'alignment', + help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.' + } + bwa_mem_read_len_limit: { + description: 'Read length limit for bwa mem (for PE FASTQs only).', + group: 'alignment', + help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.' + } + use_bowtie2_local_mode: { + description: 'Use bowtie2\'s local mode (soft-clipping).', + group: 'alignment', + help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.' + } + crop_length: { + description: 'Crop FASTQs\' reads longer than this length.', + group: 'alignment', + help: 'Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.' + } + crop_length_tol: { + description: 'Tolerance for cropping reads in FASTQs.', + group: 'alignment', + help: 'Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.' + } + trimmomatic_phred_score_format: { + description: 'Base encoding (format) for Phred score in FASTQs.', + group: 'alignment', + choices: ['auto', 'phred33', 'phred64'], + help: 'This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".' + } + xcor_trim_bp: { + description: 'Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.', + group: 'alignment', + help: 'This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.' + } + use_filt_pe_ta_for_xcor: { + description: 'Use filtered PE BAM for cross-correlation analysis.', + group: 'alignment', + help: 'If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.' + } + dup_marker: { + description: 'Marker for duplicate reads. picard or sambamba.', + group: 'alignment', + help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.', + choices: ['picard', 'sambamba'], + example: 'picard' + } + no_dup_removal: { + description: 'Disable removal of duplicate reads during filtering BAM.', + group: 'alignment', + help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.' + } + mapq_thresh: { + description: 'Threshold for low MAPQ reads removal.', + group: 'alignment', + help: 'Low MAPQ reads are filtered out while filtering BAM.' + } + filter_chrs: { + description: 'List of chromosomes to be filtered out while filtering BAM.', + group: 'alignment', + help: 'It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.' + } + subsample_reads: { + description: 'Subsample reads. Shuffle and subsample reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + ctl_subsample_reads: { + description: 'Subsample control reads. Shuffle and subsample control reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + xcor_subsample_reads: { + description: 'Subsample reads for cross-corrlelation analysis only.', + group: 'alignment', + help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.' + } + xcor_exclusion_range_min: { + description: 'Exclusion minimum for cross-correlation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.' + } + xcor_exclusion_range_max: { + description: 'Exclusion maximum for cross-coorrelation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used' + } + pseudoreplication_random_seed: { + description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).', + group: 'alignment', + help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.' + } + ctl_depth_limit: { + description: 'Hard limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.' + } + exp_ctl_depth_ratio_limit: { + description: 'Second limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.' + } + fraglen: { + description: 'Fragment length for each biological replicate.', + group: 'peak_calling', + help: 'Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.' + } + peak_caller: { + description: 'Peak caller.', + group: 'peak_calling', + help: 'It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).', + example: 'spp' + } + always_use_pooled_ctl: { + description: 'Always choose a pooled control for each experiment replicate.', + group: 'peak_calling', + help: 'If turned on, ignores chip.ctl_depth_ratio.' + } + ctl_depth_ratio: { + description: 'Maximum depth ratio between control replicates.', + group: 'peak_calling', + help: 'If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.' + } + + cap_num_peak: { + description: 'Upper limit on the number of peaks.', + group: 'peak_calling', + help: 'It is 30000000 and 50000000 by default for spp and macs2, respectively.' + } + pval_thresh: { + description: 'p-value Threshold for MACS2 peak caller.', + group: 'peak_calling', + help: 'macs2 callpeak -p' + } + fdr_thresh: { + description: 'FDR threshold for spp peak caller (phantompeakqualtools).', + group: 'peak_calling', + help: 'run_spp.R -fdr=' + } + idr_thresh: { + description: 'IDR threshold.', + group: 'peak_calling' + } + + align_cpu: { + description: 'Number of cores for task align.', + group: 'resource_parameter', + help: 'Task align merges/crops/maps FASTQs.' + } + align_bowtie2_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_bwa_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_time_hr: { + description: 'Walltime (h) required for task align.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + align_bowtie2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + align_bwa_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + filter_cpu: { + description: 'Number of cores for task filter.', + group: 'resource_parameter', + help: 'Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.' + } + filter_mem_factor: { + description: 'Multiplication factor to determine memory required for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + filter_time_hr: { + description: 'Walltime (h) required for task filter.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + filter_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.' + } + bam2ta_cpu: { + description: 'Number of cores for task bam2ta.', + group: 'resource_parameter', + help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.' + } + bam2ta_mem_factor: { + description: 'Multiplication factor to determine memory required for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + bam2ta_time_hr: { + description: 'Walltime (h) required for task bam2ta.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + bam2ta_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + spr_mem_factor: { + description: 'Multiplication factor to determine memory required for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + spr_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + jsd_cpu: { + description: 'Number of cores for task jsd.', + group: 'resource_parameter', + help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.' + } + jsd_mem_factor: { + description: 'Multiplication factor to determine memory required for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + jsd_time_hr: { + description: 'Walltime (h) required for task jsd.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + jsd_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + xcor_cpu: { + description: 'Number of cores for task xcor.', + group: 'resource_parameter', + help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.' + } + xcor_mem_factor: { + description: 'Multiplication factor to determine memory required for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + xcor_time_hr: { + description: 'Walltime (h) required for task xcor.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + xcor_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + subsample_ctl_mem_factor: { + description: 'Multiplication factor to determine memory required for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + subsample_ctl_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_cpu: { + description: 'Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.', + group: 'resource_parameter', + help: 'Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.' + } + call_peak_spp_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_macs2_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_time_hr: { + description: 'Walltime (h) required for task call_peak.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + call_peak_spp_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_macs2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + macs2_signal_track_mem_factor: { + description: 'Multiplication factor to determine memory required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + macs2_signal_track_time_hr: { + description: 'Walltime (h) required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + macs2_signal_track_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + align_trimmomatic_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task align.', + group: 'resource_parameter', + help: 'Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.' + } + filter_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task filter.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.' + } + gc_bias_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task gc_bias.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.' + } + } + RuntimeEnvironment runtime_environment = { + 'docker': docker, 'singularity': singularity, 'conda': conda + } + RuntimeEnvironment runtime_environment_spp = { + 'docker': docker, 'singularity': singularity, 'conda': conda_spp + } + RuntimeEnvironment runtime_environment_macs2 = { + 'docker': docker, 'singularity': singularity, 'conda': conda_macs2 + } + + # read genome data and paths + if ( defined(genome_tsv) ) { + call read_genome_tsv { input: + genome_tsv = genome_tsv, + runtime_environment = runtime_environment + } + } + File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa]) + File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar + else read_genome_tsv.bwa_idx_tar + File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar]) + File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz]) + String gensz_ = select_first([gensz, read_genome_tsv.gensz]) + File? blacklist1_ = if defined(blacklist) then blacklist + else read_genome_tsv.blacklist + File? blacklist2_ = if defined(blacklist2) then blacklist2 + else read_genome_tsv.blacklist2 + # merge multiple blacklists + # two blacklists can have different number of columns (3 vs 6) + # so we limit merged blacklist's columns to 3 + Array[File] blacklists = select_all([blacklist1_, blacklist2_]) + if ( length(blacklists) > 1 ) { + call pool_ta as pool_blacklist { input: + tas = blacklists, + col = 3, + runtime_environment = runtime_environment + } + } + File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled + else if length(blacklists) > 0 then blacklists[0] + else blacklist2_ + String mito_chr_name_ = select_first([mito_chr_name, read_genome_tsv.mito_chr_name]) + String regex_bfilt_peak_chr_name_ = select_first([regex_bfilt_peak_chr_name, read_genome_tsv.regex_bfilt_peak_chr_name]) + String genome_name_ = select_first([genome_name, read_genome_tsv.genome_name, basename(chrsz_)]) + + ### temp vars (do not define these) + String aligner_ = if defined(custom_align_py) then 'custom' else aligner + String peak_caller_ = if pipeline_type=='tf' then select_first([peak_caller, 'spp']) + else select_first([peak_caller, 'macs2']) + String peak_type_ = if peak_caller_=='spp' then 'regionPeak' + else 'narrowPeak' + Boolean enable_idr = pipeline_type=='tf' # enable_idr for TF chipseq only + String idr_rank_ = if peak_caller_=='spp' then 'signal.value' + else if peak_caller_=='macs2' then 'p.value' + else 'p.value' + Int cap_num_peak_spp = 300000 + Int cap_num_peak_macs2 = 500000 + Int cap_num_peak_ = if peak_caller_ == 'spp' then select_first([cap_num_peak, cap_num_peak_spp]) + else select_first([cap_num_peak, cap_num_peak_macs2]) + Int mapq_thresh_ = mapq_thresh + Boolean enable_xcor_ = if pipeline_type=='control' then false else true + Boolean enable_count_signal_track_ = if pipeline_type=='control' then false else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type=='control' then false else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type=='control' then false else enable_gc_bias + Boolean align_only_ = if pipeline_type=='control' then true else align_only + + Float align_mem_factor_ = if aligner_ =='bowtie2' then align_bowtie2_mem_factor + else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ =='bowtie2' then align_bowtie2_disk_factor + else align_bwa_disk_factor + Float call_peak_mem_factor_ = if peak_caller_ =='spp' then call_peak_spp_mem_factor + else call_peak_macs2_mem_factor + Float call_peak_disk_factor_ = if peak_caller_ =='spp' then call_peak_spp_disk_factor + else call_peak_macs2_disk_factor + + # temporary 2-dim fastqs array [rep_id][merge_id] + Array[Array[File]] fastqs_R1 = + if length(fastqs_rep10_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1, fastqs_rep10_R1] + else if length(fastqs_rep9_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1] + else if length(fastqs_rep8_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1] + else if length(fastqs_rep7_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1] + else if length(fastqs_rep6_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1] + else if length(fastqs_rep5_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1] + else if length(fastqs_rep4_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1] + else if length(fastqs_rep3_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1] + else if length(fastqs_rep2_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1] + else if length(fastqs_rep1_R1)>0 then + [fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] fastqs_R2 = + [fastqs_rep1_R2, fastqs_rep2_R2, fastqs_rep3_R2, fastqs_rep4_R2, fastqs_rep5_R2, + fastqs_rep6_R2, fastqs_rep7_R2, fastqs_rep8_R2, fastqs_rep9_R2, fastqs_rep10_R2] + + # temporary 2-dim ctl fastqs array [rep_id][merge_id] + Array[Array[File]] ctl_fastqs_R1 = + if length(ctl_fastqs_rep10_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1] + else if length(ctl_fastqs_rep9_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1] + else if length(ctl_fastqs_rep8_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1] + else if length(ctl_fastqs_rep7_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1] + else if length(ctl_fastqs_rep6_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1] + else if length(ctl_fastqs_rep5_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1] + else if length(ctl_fastqs_rep4_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1] + else if length(ctl_fastqs_rep3_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1] + else if length(ctl_fastqs_rep2_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1] + else if length(ctl_fastqs_rep1_R1)>0 then + [ctl_fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] ctl_fastqs_R2 = + [ctl_fastqs_rep1_R2, ctl_fastqs_rep2_R2, ctl_fastqs_rep3_R2, ctl_fastqs_rep4_R2, ctl_fastqs_rep5_R2, + ctl_fastqs_rep6_R2, ctl_fastqs_rep7_R2, ctl_fastqs_rep8_R2, ctl_fastqs_rep9_R2, ctl_fastqs_rep10_R2] + + # temporary variables to get number of replicates + # WDLic implementation of max(A,B,C,...) + Int num_rep_fastq = length(fastqs_R1) + Int num_rep_bam = if length(bams) 0 || num_ctl_fastq > 0) && aligner_ != 'bwa' && aligner_ != 'bowtie2' && aligner_ != 'custom' ) { + call raise_exception as error_wrong_aligner { input: + msg = 'Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bwa' && use_bwa_mem_for_pe ) { + call raise_exception as error_use_bwa_mem_for_non_bwa { input: + msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) { + call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: + msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) { + call raise_exception as error_custom_aligner { input: + msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + runtime_environment = runtime_environment + } + } + + if ( ( ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0 ) && num_ctl > 1 && length(ctl_paired_ends) > 1 ) { + call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: + msg = 'Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for ' + + 'multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). ' + + 'Automatic control subsampling is enabled by default. ' + + 'Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. ' + + 'You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done ' + + 'for individual control\'s TAG-ALIGN output according to each control\'s endedness. ', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_ctl > 0 ) { + call raise_exception as error_ctl_input_defined_in_control_mode { input: + msg = 'In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_rep_fastq == 0 ) { + call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: + msg = 'Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.', + runtime_environment = runtime_environment + } + } + + # align each replicate + scatter(i in range(num_rep)) { + # to override endedness definition for individual replicate + # paired_end will override paired_ends[i] + Boolean paired_end_ = if !defined(paired_end) && i0 + Boolean has_output_of_align = i0 + Boolean has_output_of_align_ctl = i1 ) { + # pool tagaligns from true replicates + call pool_ta { input : + tas = ta_, + prefix = 'rep', + runtime_environment = runtime_environment + } + } + + # if there are pr1 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 1 + call pool_ta as pool_ta_pr1 { input : + tas = spr.ta_pr1, + prefix = 'rep-pr1', + runtime_environment = runtime_environment + } + } + + # if there are pr2 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 2 + call pool_ta as pool_ta_pr2 { input : + tas = spr.ta_pr2, + prefix = 'rep-pr2', + runtime_environment = runtime_environment + } + } + + # if there are CTL TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_))==num_ctl + if ( has_all_inputs_of_pool_ta_ctl && num_ctl>1 ) { + # pool tagaligns from true replicates + call pool_ta as pool_ta_ctl { input : + tas = ctl_ta_, + prefix = 'ctl', + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) + if ( has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep>1 ) { + call count_signal_track as count_signal_track_pooled { input : + ta = pool_ta.ta_pooled, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_))==num_rep + if ( has_input_of_jsd && num_rep > 0 && enable_jsd_ ) { + # fingerprint and JS-distance plot + call jsd { input : + nodup_bams = nodup_bam_, + ctl_bams = ctl_nodup_bam_, # use first control only + blacklist = blacklist_, + mapq_thresh = mapq_thresh_, + + cpu = jsd_cpu, + mem_factor = jsd_mem_factor, + time_hr = jsd_time_hr, + disk_factor = jsd_disk_factor, + runtime_environment = runtime_environment + } + } + + Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep + && length(select_all(ctl_ta_))==num_ctl && num_ctl > 0 + if ( has_all_input_of_choose_ctl && !align_only_ ) { + # choose appropriate control for each exp IP replicate + # outputs: + # choose_ctl.idx : control replicate index for each exp replicate + # -1 means pooled ctl replicate + call choose_ctl { input: + tas = ta_, + ctl_tas = ctl_ta_, + ta_pooled = pool_ta.ta_pooled, + ctl_ta_pooled = pool_ta_ctl.ta_pooled, + always_use_pooled_ctl = always_use_pooled_ctl, + ctl_depth_ratio = ctl_depth_ratio, + ctl_depth_limit = ctl_depth_limit, + exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, + runtime_environment = runtime_environment + } + } + + scatter(i in range(num_rep)) { + # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] + # chosen_ctl_ta_id + # >=0: control TA index (this means that control TA with this index exists) + # -1: use pooled control + # -2: there is no control + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_ids])[i] else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample])[i] else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false + else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] + else ctl_paired_end_[chosen_ctl_ta_id] + + if ( chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0 ) { + call subsample_ctl { input: + ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled + else ctl_ta_[ chosen_ctl_ta_id ], + subsample = chosen_ctl_ta_subsample, + paired_end = chosen_ctl_paired_end, + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] + else if chosen_ctl_ta_subsample > 0 then [ select_first([subsample_ctl.ta_subsampled]) ] + else if chosen_ctl_ta_id == -1 then [ select_first([pool_ta_ctl.ta_pooled]) ] + else [ select_first([ctl_ta_[ chosen_ctl_ta_id ]]) ] + } + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample_pooled]) else 0 + + # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) + Array[Int] fraglen_tmp = select_all(fraglen_) + + # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks + scatter(i in range(num_rep)) { + Boolean has_input_of_call_peak = defined(ta_[i]) + Boolean has_output_of_call_peak = i 1 ) { + # rounded mean of fragment length, which will be used for + # 1) calling peaks for pooled true/pseudo replicates + # 2) calculating FRiP + call rounded_mean as fraglen_mean { input : + ints = fraglen_tmp, + runtime_environment = runtime_environment + } + # } + + if ( has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0 ) { + call subsample_ctl as subsample_ctl_pooled { input: + ta = if num_ctl < 2 then ctl_ta_[0] + else pool_ta_ctl.ta_pooled, + subsample = chosen_ctl_ta_pooled_subsample, + paired_end = ctl_paired_end_[0], + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + # actually not an array + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] + else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled ] + else if num_ctl < 2 then [ ctl_ta_[0] ] + else [ pool_ta_ctl.ta_pooled ] + + Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) + Boolean has_output_of_call_peak_pooled = defined(peak_pooled) + if ( has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + # call peaks on pooled replicate + # always call peaks for pooled replicate to get signal tracks + call call_peak as call_peak_pooled { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled + else call_peak_pooled.peak + + # macs2 signal track for pooled rep + if ( has_input_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + call macs2_signal_track as macs2_signal_track_pooled { input : + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_mean.rounded_mean, + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2 + } + } + + Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) + Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) + if ( has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 1st pooled pseudo replicates + call call_peak as call_peak_ppr1 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr1.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 + else call_peak_ppr1.peak + + Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) + Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) + if ( has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 2nd pooled pseudo replicates + call call_peak as call_peak_ppr2 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr2.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 + else call_peak_ppr2.peak + + # do IDR/overlap on all pairs of two replicates (i,j) + # where i and j are zero-based indices and 0 <= i < j < num_rep + scatter( pair in cross(range(num_rep),range(num_rep)) ) { + # pair.left = 0-based index of 1st replicate + # pair.right = 0-based index of 2nd replicate + File? peak1_ = peak_[pair.left] + File? peak2_ = peak_[pair.right] + if ( !align_only_ && pair.left 1 ) { + # Naive overlap on pooled pseudo replicates + call overlap as overlap_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 1 && enable_idr ) { + # IDR on pooled pseduo replicates + call idr as idr_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + idr_thresh = idr_thresh, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + # reproducibility QC for overlap/IDR peaks + if ( !align_only_ && !true_rep_only && num_rep > 0 ) { + # reproducibility QC for overlapping peaks + call reproducibility as reproducibility_overlap { input : + prefix = 'overlap', + peaks = select_all(overlap.bfilt_overlap_peak), + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([overlap_pr.bfilt_overlap_peak]) else [], + peak_ppr = overlap_ppr.bfilt_overlap_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 0 && enable_idr ) { + # reproducibility QC for IDR peaks + call reproducibility as reproducibility_idr { input : + prefix = 'idr', + peaks = select_all(idr.bfilt_idr_peak), + peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([idr_pr.bfilt_idr_peak]) else [], + peak_ppr = idr_ppr.bfilt_idr_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + # Generate final QC report and JSON + call qc_report { input : + pipeline_ver = pipeline_ver, + title = title, + description = description, + genome = genome_name_, + paired_ends = paired_end_, + ctl_paired_ends = ctl_paired_end_, + pipeline_type = pipeline_type, + aligner = aligner_, + no_dup_removal = no_dup_removal, + peak_caller = peak_caller_, + cap_num_peak = cap_num_peak_, + idr_thresh = idr_thresh, + pval_thresh = pval_thresh, + xcor_trim_bp = xcor_trim_bp, + xcor_subsample_reads = xcor_subsample_reads, + + samstat_qcs = select_all(align.samstat_qc), + nodup_samstat_qcs = select_all(filter.samstat_qc), + dup_qcs = select_all(filter.dup_qc), + lib_complexity_qcs = select_all(filter.lib_complexity_qc), + xcor_plots = select_all(xcor.plot_png), + xcor_scores = select_all(xcor.score), + + ctl_samstat_qcs = select_all(align_ctl.samstat_qc), + ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), + ctl_dup_qcs = select_all(filter_ctl.dup_qc), + ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), + + jsd_plot = jsd.plot, + jsd_qcs = if defined(jsd.jsd_qcs) then select_first([jsd.jsd_qcs]) else [], + + frip_qcs = select_all(call_peak.frip_qc), + frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), + frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), + frip_qc_pooled = call_peak_pooled.frip_qc, + frip_qc_ppr1 = call_peak_ppr1.frip_qc, + frip_qc_ppr2 = call_peak_ppr2.frip_qc, + + idr_plots = select_all(idr.idr_plot), + idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([idr_pr.idr_plot]) else [], + idr_plot_ppr = idr_ppr.idr_plot, + frip_idr_qcs = select_all(idr.frip_qc), + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([idr_pr.frip_qc]) else [], + frip_idr_qc_ppr = idr_ppr.frip_qc, + frip_overlap_qcs = select_all(overlap.frip_qc), + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([overlap_pr.frip_qc]) else [], + frip_overlap_qc_ppr = overlap_ppr.frip_qc, + idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, + overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, + + gc_plots = select_all(gc_bias.gc_plot), + + peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), + peak_region_size_plots = select_all(call_peak.peak_region_size_plot), + num_peak_qcs = select_all(call_peak.num_peak_qc), + + idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, + idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, + + overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, + overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, + + runtime_environment = runtime_environment + } + + output { + File report = qc_report.report + File qc_json = qc_report.qc_json + Boolean qc_json_ref_match = qc_report.qc_json_ref_match + } +} + +task align { + input { + Array[File] fastqs_R1 # [merge_id] + Array[File] fastqs_R2 + File? ref_fa + Int? trim_bp # this is for R1 only + Int crop_length + Int crop_length_tol + String? trimmomatic_phred_score_format + + String aligner + + String mito_chr_name + Int? multimapping + File? custom_align_py + File? idx_tar # reference index tar + Boolean paired_end + Boolean use_bwa_mem_for_pe + Int bwa_mem_read_len_limit + Boolean use_bowtie2_local_mode + + String? trimmomatic_java_heap + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") + Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) + + Float trimmomatic_java_heap_factor = 0.9 + Array[Array[File]] tmp_fastqs = if paired_end then transpose([fastqs_R1, fastqs_R2]) + else transpose([fastqs_R1]) + command { + set -e + + # check if pipeline dependencies can be found + if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] + then + echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 + exit 3 + fi + python3 $(which encode_task_merge_fastq.py) \ + ${write_tsv(tmp_fastqs)} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--nth ' + cpu} + + if [ -z '${trim_bp}' ]; then + SUFFIX= + else + SUFFIX=_trimmed + python3 $(which encode_task_trim_fastq.py) \ + R1/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R1$SUFFIX + if [ '${paired_end}' == 'true' ]; then + python3 $(which encode_task_trim_fastq.py) \ + R2/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R2$SUFFIX + fi + fi + if [ '${crop_length}' == '0' ]; then + SUFFIX=$SUFFIX + else + NEW_SUFFIX="$SUFFIX"_cropped + python3 $(which encode_task_trimmomatic.py) \ + --fastq1 R1$SUFFIX/*.fastq.gz \ + ${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + --crop-length ${crop_length} \ + --crop-length-tol "${crop_length_tol}" \ + ${'--phred-score-format ' + trimmomatic_phred_score_format } \ + --out-dir-R1 R1$NEW_SUFFIX \ + ${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \ + ${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + 'G')} \ + ${'--nth ' + cpu} + SUFFIX=$NEW_SUFFIX + fi + + if [ '${aligner}' == 'bwa' ]; then + python3 $(which encode_task_bwa.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \ + ${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + + elif [ '${aligner}' == 'bowtie2' ]; then + python3 $(which encode_task_bowtie2.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${'--multimapping ' + multimapping} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bowtie2_local_mode then '--local' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + else + python3 ${custom_align_py} \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_align.py) \ + R1$SUFFIX/*.fastq.gz $(ls *.bam) \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + rm -rf R1 R2 R1$SUFFIX R2$SUFFIX + } + output { + File bam = glob('*.bam')[0] + File bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File read_len_log = glob('*.read_length.txt')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task filter { + input { + File? bam + Boolean paired_end + File? ref_fa + Boolean redact_nodup_bam + String dup_marker # picard.jar MarkDuplicates (picard) or + # sambamba markdup (sambamba) + Int mapq_thresh # threshold for low MAPQ reads removal + Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM + File chrsz # 2-col chromosome sizes file + Boolean no_dup_removal # no dupe reads removal when filtering BAM + String mito_chr_name + + Int cpu + Float mem_factor + String? picard_java_heap + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float picard_java_heap_factor = 0.9 + Float mem_gb = 6.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_filter.py) \ + ${bam} \ + ${if paired_end then '--paired-end' else ''} \ + --multimapping 0 \ + ${'--dup-marker ' + dup_marker} \ + ${'--mapq-thresh ' + mapq_thresh} \ + --filter-chrs ${sep=' ' filter_chrs} \ + ${'--chrsz ' + chrsz} \ + ${if no_dup_removal then '--no-dup-removal' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + + if [ '${redact_nodup_bam}' == 'true' ]; then + python3 $(which encode_task_bam_to_pbam.py) \ + $(ls *.bam) \ + ${'--ref-fa ' + ref_fa} \ + '--delete-original-bam' + fi + } + output { + File nodup_bam = glob('*.bam')[0] + File nodup_bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File dup_qc = glob('*.dup.qc')[0] + File lib_complexity_qc = glob('*.lib_complexity.qc')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task bam2ta { + input { + File? bam + Boolean paired_end + String mito_chr_name # mito chromosome name + Int subsample # number of reads to subsample TAGALIGN + # this affects all downstream analysis + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_bam2ta.py) \ + ${bam} \ + --disable-tn5-shift \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + } + output { + File ta = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task spr { + input { + File? ta + Boolean paired_end + Int pseudoreplication_random_seed + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_spr.py) \ + ${ta} \ + ${'--pseudoreplication-random-seed ' + pseudoreplication_random_seed} \ + ${if paired_end then '--paired-end' else ''} + } + output { + File ta_pr1 = glob('*.pr1.tagAlign.gz')[0] + File ta_pr2 = glob('*.pr2.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task pool_ta { + input { + Array[File?] tas + Int? col # number of columns in pooled TA + String? prefix # basename prefix + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_pool_ta.py) \ + ${sep=' ' select_all(tas)} \ + ${'--prefix ' + prefix} \ + ${'--col ' + col} + } + output { + File ta_pooled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '8 GB' + time : 4 + disks : 'local-disk 100 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task xcor { + input { + File? ta + Boolean paired_end + String mito_chr_name + Int subsample # number of reads to subsample TAGALIGN + # this will be used for xcor only + # will not affect any downstream analysis + String? chip_seq_type + Int? exclusion_range_min + Int? exclusion_range_max + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 8.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_xcor.py) \ + ${ta} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--chip-seq-type ' + chip_seq_type} \ + ${'--exclusion-range-min ' + exclusion_range_min} \ + ${'--exclusion-range-max ' + exclusion_range_max} \ + ${'--subsample ' + subsample} \ + ${'--nth ' + cpu} + } + output { + File plot_pdf = glob('*.cc.plot.pdf')[0] + File plot_png = glob('*.cc.plot.png')[0] + File score = glob('*.cc.qc')[0] + File fraglen_log = glob('*.cc.fraglen.txt')[0] + Int fraglen = read_int(fraglen_log) + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task jsd { + input { + Array[File?] nodup_bams + Array[File?] ctl_bams + File? blacklist + Int mapq_thresh + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") + Float mem_gb = 5.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_jsd.py) \ + ${sep=' ' select_all(nodup_bams)} \ + ${if length(ctl_bams)>0 then '--ctl-bam '+ select_first(ctl_bams) else ''} \ + ${'--mapq-thresh '+ mapq_thresh} \ + ${'--blacklist '+ blacklist} \ + ${'--nth ' + cpu} + } + output { + File plot = glob('*.png')[0] + Array[File] jsd_qcs = glob('*.jsd.qc') + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task choose_ctl { + input { + Array[File?] tas + Array[File?] ctl_tas + File? ta_pooled + File? ctl_ta_pooled + Boolean always_use_pooled_ctl # always use pooled control for all exp rep. + Float ctl_depth_ratio # if ratio between controls is higher than this + # then always use pooled control for all exp rep. + Int ctl_depth_limit + Float exp_ctl_depth_ratio_limit + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_choose_ctl.py) \ + --tas ${sep=' ' select_all(tas)} \ + --ctl-tas ${sep=' ' select_all(ctl_tas)} \ + ${'--ta-pooled ' + ta_pooled} \ + ${'--ctl-ta-pooled ' + ctl_ta_pooled} \ + ${if always_use_pooled_ctl then '--always-use-pooled-ctl' else ''} \ + ${'--ctl-depth-ratio ' + ctl_depth_ratio} \ + ${'--ctl-depth-limit ' + ctl_depth_limit} \ + ${'--exp-ctl-depth-ratio-limit ' + exp_ctl_depth_ratio_limit} + } + output { + File chosen_ctl_id_tsv = glob('chosen_ctl.tsv')[0] + File chosen_ctl_subsample_tsv = glob('chosen_ctl_subsample.tsv')[0] + File chosen_ctl_subsample_pooled_txt = glob('chosen_ctl_subsample_pooled.txt')[0] + Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) + Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) + Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task count_signal_track { + input { + File? ta # tag-align + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + Float mem_gb = 8.0 + + command { + set -e + python3 $(which encode_task_count_signal_track.py) \ + ${ta} \ + ${'--chrsz ' + chrsz} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pos_bw = glob('*.positive.bigwig')[0] + File neg_bw = glob('*.negative.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task subsample_ctl { + input { + File? ta + Boolean paired_end + Int subsample + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + python3 $(which encode_task_subsample_ctl.py) \ + ${ta} \ + ${'--subsample ' + subsample} \ + ${if paired_end then '--paired-end' else ''} \ + } + output { + File ta_subsampled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task call_peak { + input { + String peak_caller + String peak_type + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Int cap_num_peak # cap number of raw peaks called from MACS2 + Float pval_thresh # p.value threshold for MACS2 + Float? fdr_thresh # FDR threshold for SPP + + File? blacklist # blacklist BED to filter raw peaks + String? regex_bfilt_peak_chr_name + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + + if [ '${peak_caller}' == 'macs2' ]; then + python3 $(which encode_task_macs2_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + + elif [ '${peak_caller}' == 'spp' ]; then + python3 $(which encode_task_spp.py) \ + ${sep=' ' select_all(tas)} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--fdr-thresh '+ fdr_thresh} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_call_peak_chip.py) \ + $(ls *Peak.gz) \ + ${'--ta ' + tas[0]} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--peak-type ' + peak_type} \ + ${'--blacklist ' + blacklist} + } + output { + File peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + # generated by post_call_peak py + File bfilt_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = glob('*.frip.qc')[0] + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : if peak_caller == 'macs2' then 2 else cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task macs2_signal_track { + input { + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Float pval_thresh # p.value threshold + + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_macs2_signal_track_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pval_bw = glob('*.pval.signal.bigwig')[0] + File fc_bw = glob('*.fc.signal.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task idr { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + Float idr_thresh + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor + File chrsz # 2-col chromosome sizes file + String peak_type + String rank + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_idr.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--idr-thresh ' + idr_thresh} \ + ${'--peak-type ' + peak_type} \ + --idr-rank ${rank} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File idr_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_idr_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_idr_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_idr_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_idr_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_idr_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File idr_plot = glob('*.txt.png')[0] + File idr_unthresholded_peak = glob('*.txt.gz')[0] + File idr_log = glob('*.idr*.log')[0] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task overlap { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor (for FRIP) + File chrsz # 2-col chromosome sizes file + String peak_type + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_overlap.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + --nonamecheck \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File overlap_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_overlap_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_overlap_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_overlap_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_overlap_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_overlap_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task reproducibility { + input { + String prefix + Array[File] peaks # peak files from pair of true replicates + # in a sorted order. for example of 4 replicates, + # 1,2 1,3 1,4 2,3 2,4 3,4. + # x,y means peak file from rep-x vs rep-y + Array[File] peaks_pr # peak files from pseudo replicates + File? peak_ppr # Peak file from pooled pseudo replicate. + String peak_type + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_reproducibility.py) \ + ${sep=' ' peaks} \ + --peaks-pr ${sep=' ' peaks_pr} \ + ${'--peak-ppr '+ peak_ppr} \ + --prefix ${prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--chrsz ' + chrsz} + } + output { + File optimal_peak = glob('*optimal_peak.*.gz')[0] + File optimal_peak_bb = glob('*optimal_peak.*.bb')[0] + File optimal_peak_starch = glob('*optimal_peak.*.starch')[0] + File optimal_peak_hammock = glob('*optimal_peak.*.hammock.gz*')[0] + File optimal_peak_hammock_tbi = glob('*optimal_peak.*.hammock.gz*')[1] + File conservative_peak = glob('*conservative_peak.*.gz')[0] + File conservative_peak_bb = glob('*conservative_peak.*.bb')[0] + File conservative_peak_starch = glob('*conservative_peak.*.starch')[0] + File conservative_peak_hammock = glob('*conservative_peak.*.hammock.gz*')[0] + File conservative_peak_hammock_tbi = glob('*conservative_peak.*.hammock.gz*')[1] + File reproducibility_qc = glob('*reproducibility.qc')[0] + # QC metrics for optimal peak + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task gc_bias { + input { + File? nodup_bam + File ref_fa + + String? picard_java_heap + + RuntimeEnvironment runtime_environment + } + Float mem_factor = 0.3 + Float input_file_size_gb = size(nodup_bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float picard_java_heap_factor = 0.9 + + command { + set -e + python3 $(which encode_task_gc_bias.py) \ + ${'--nodup-bam ' + nodup_bam} \ + ${'--ref-fa ' + ref_fa} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + } + output { + File gc_plot = glob('*.gc_plot.png')[0] + File gc_log = glob('*.gc.txt')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 6 + disks : 'local-disk 250 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task qc_report { + input { + # optional metadata + String pipeline_ver + String title # name of sample + String description # description for sample + String? genome + #String? encode_accession_id # ENCODE accession ID of sample + # workflow params + Array[Boolean] paired_ends + Array[Boolean] ctl_paired_ends + String pipeline_type + String aligner + Boolean no_dup_removal + String peak_caller + Int cap_num_peak + Float idr_thresh + Float pval_thresh + Int xcor_trim_bp + Int xcor_subsample_reads + # QCs + Array[File] samstat_qcs + Array[File] nodup_samstat_qcs + Array[File] dup_qcs + Array[File] lib_complexity_qcs + Array[File] ctl_samstat_qcs + Array[File] ctl_nodup_samstat_qcs + Array[File] ctl_dup_qcs + Array[File] ctl_lib_complexity_qcs + Array[File] xcor_plots + Array[File] xcor_scores + File? jsd_plot + Array[File] jsd_qcs + Array[File] idr_plots + Array[File] idr_plots_pr + File? idr_plot_ppr + Array[File] frip_qcs + Array[File] frip_qcs_pr1 + Array[File] frip_qcs_pr2 + File? frip_qc_pooled + File? frip_qc_ppr1 + File? frip_qc_ppr2 + Array[File] frip_idr_qcs + Array[File] frip_idr_qcs_pr + File? frip_idr_qc_ppr + Array[File] frip_overlap_qcs + Array[File] frip_overlap_qcs_pr + File? frip_overlap_qc_ppr + File? idr_reproducibility_qc + File? overlap_reproducibility_qc + + Array[File] gc_plots + + Array[File] peak_region_size_qcs + Array[File] peak_region_size_plots + Array[File] num_peak_qcs + + File? idr_opt_peak_region_size_qc + File? idr_opt_peak_region_size_plot + File? idr_opt_num_peak_qc + + File? overlap_opt_peak_region_size_qc + File? overlap_opt_peak_region_size_plot + File? overlap_opt_num_peak_qc + + File? qc_json_ref + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ + ${'--pipeline-ver ' + pipeline_ver} \ + ${"--title '" + sub(title,"'","_") + "'"} \ + ${"--desc '" + sub(description,"'","_") + "'"} \ + ${'--genome ' + genome} \ + ${'--multimapping ' + 0} \ + --paired-ends ${sep=' ' paired_ends} \ + --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ + --pipeline-type ${pipeline_type} \ + --aligner ${aligner} \ + ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ + --peak-caller ${peak_caller} \ + ${'--cap-num-peak ' + cap_num_peak} \ + --idr-thresh ${idr_thresh} \ + --pval-thresh ${pval_thresh} \ + --xcor-trim-bp ${xcor_trim_bp} \ + --xcor-subsample-reads ${xcor_subsample_reads} \ + --samstat-qcs ${sep='_:_' samstat_qcs} \ + --nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \ + --dup-qcs ${sep='_:_' dup_qcs} \ + --lib-complexity-qcs ${sep='_:_' lib_complexity_qcs} \ + --xcor-plots ${sep='_:_' xcor_plots} \ + --xcor-scores ${sep='_:_' xcor_scores} \ + --idr-plots ${sep='_:_' idr_plots} \ + --idr-plots-pr ${sep='_:_' idr_plots_pr} \ + --ctl-samstat-qcs ${sep='_:_' ctl_samstat_qcs} \ + --ctl-nodup-samstat-qcs ${sep='_:_' ctl_nodup_samstat_qcs} \ + --ctl-dup-qcs ${sep='_:_' ctl_dup_qcs} \ + --ctl-lib-complexity-qcs ${sep='_:_' ctl_lib_complexity_qcs} \ + ${'--jsd-plot ' + jsd_plot} \ + --jsd-qcs ${sep='_:_' jsd_qcs} \ + ${'--idr-plot-ppr ' + idr_plot_ppr} \ + --frip-qcs ${sep='_:_' frip_qcs} \ + --frip-qcs-pr1 ${sep='_:_' frip_qcs_pr1} \ + --frip-qcs-pr2 ${sep='_:_' frip_qcs_pr2} \ + ${'--frip-qc-pooled ' + frip_qc_pooled} \ + ${'--frip-qc-ppr1 ' + frip_qc_ppr1} \ + ${'--frip-qc-ppr2 ' + frip_qc_ppr2} \ + --frip-idr-qcs ${sep='_:_' frip_idr_qcs} \ + --frip-idr-qcs-pr ${sep='_:_' frip_idr_qcs_pr} \ + ${'--frip-idr-qc-ppr ' + frip_idr_qc_ppr} \ + --frip-overlap-qcs ${sep='_:_' frip_overlap_qcs} \ + --frip-overlap-qcs-pr ${sep='_:_' frip_overlap_qcs_pr} \ + ${'--frip-overlap-qc-ppr ' + frip_overlap_qc_ppr} \ + ${'--idr-reproducibility-qc ' + idr_reproducibility_qc} \ + ${'--overlap-reproducibility-qc ' + overlap_reproducibility_qc} \ + --gc-plots ${sep='_:_' gc_plots} \ + --peak-region-size-qcs ${sep='_:_' peak_region_size_qcs} \ + --peak-region-size-plots ${sep='_:_' peak_region_size_plots} \ + --num-peak-qcs ${sep='_:_' num_peak_qcs} \ + ${'--idr-opt-peak-region-size-qc ' + idr_opt_peak_region_size_qc} \ + ${'--idr-opt-peak-region-size-plot ' + idr_opt_peak_region_size_plot} \ + ${'--idr-opt-num-peak-qc ' + idr_opt_num_peak_qc} \ + ${'--overlap-opt-peak-region-size-qc ' + overlap_opt_peak_region_size_qc} \ + ${'--overlap-opt-peak-region-size-plot ' + overlap_opt_peak_region_size_plot} \ + ${'--overlap-opt-num-peak-qc ' + overlap_opt_num_peak_qc} \ + --out-qc-html qc.html \ + --out-qc-json qc.json \ + ${'--qc-json-ref ' + qc_json_ref} + } + output { + File report = glob('*qc.html')[0] + File qc_json = glob('*qc.json')[0] + Boolean qc_json_ref_match = read_string('qc_json_ref_match.txt')=='True' + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +### workflow system tasks +task read_genome_tsv { + input { + File? genome_tsv + String? null_s + + RuntimeEnvironment runtime_environment + } + command <<< + echo "$(basename ~{genome_tsv})" > genome_name + # create empty files for all entries + touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 + touch mito_chr_name + touch regex_bfilt_peak_chr_name + + python <>> + output { + String? genome_name = read_string('genome_name') + String? ref_fa = if size('ref_fa')==0 then null_s else read_string('ref_fa') + String? bwa_idx_tar = if size('bwa_idx_tar')==0 then null_s else read_string('bwa_idx_tar') + String? bowtie2_idx_tar = if size('bowtie2_idx_tar')==0 then null_s else read_string('bowtie2_idx_tar') + String? chrsz = if size('chrsz')==0 then null_s else read_string('chrsz') + String? gensz = if size('gensz')==0 then null_s else read_string('gensz') + String? blacklist = if size('blacklist')==0 then null_s else read_string('blacklist') + String? blacklist2 = if size('blacklist2')==0 then null_s else read_string('blacklist2') + String? mito_chr_name = if size('mito_chr_name')==0 then null_s else read_string('mito_chr_name') + String? regex_bfilt_peak_chr_name = if size('regex_bfilt_peak_chr_name')==0 then 'chr[\\dXY]+' + else read_string('regex_bfilt_peak_chr_name') + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task rounded_mean { + input { + Array[Int] ints + + RuntimeEnvironment runtime_environment + } + command <<< + python <>> + output { + Int rounded_mean = read_int('tmp.txt') + } + runtime { + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task raise_exception { + input { + String msg + + RuntimeEnvironment runtime_environment + } + command { + echo -e "\n* Error: ${msg}\n" >&2 + exit 2 + } + output { + String error_msg = '${msg}' + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl new file mode 100644 index 000000000..281e31443 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl @@ -0,0 +1,7 @@ +## # Header +# regular comment will be left as is +## part of preamble + +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput +version 1.2 diff --git a/backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl b/backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl new file mode 100644 index 000000000..81faa4fa6 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl @@ -0,0 +1,165 @@ +## # Header +# regular comment will be left as is +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput + +## part of preamble +version 1.2 + +#@ except: MissingMetas +struct AStruct { + String member +} + +task a_task { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input + # Here is a comment before the input. + { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + command <<< >>> + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } + + requirements + # This is a comment before the requirements. + { + container: "ubuntu:latest" + } + + hints { + max_cpu: 1 + } +} + +## These double-pound-sign comments +## should be converted to single-pound-sign comments. +workflow hello { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] ## This should be converted to a single-pound-sign comment. + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + call a_task { + } + + scatter (name in name_array) { + call say_task { greeting = greeting } + } + + if (some_condition_task) { + call a_task as task_two {} + } + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } +} \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl new file mode 100644 index 000000000..7c8de0324 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -0,0 +1 @@ +version 1.0 diff --git a/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl b/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl new file mode 100644 index 000000000..7e3333f0a --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl @@ -0,0 +1,106 @@ +version +1.0 +workflow +test_wf +{ +input +{ +SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { +noncanonical_motifs: 30, +GT_AG_and_CT_AC_motif: 12, +} +} +parameter_meta +{ +out_sj_filter_overhang_min: { +type: "SpliceJunctionMotifs", +label: "Minimum overhang required to support a splicing junction" +} +} +output +{ +SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min +String a = "friend" +Int b = 1 + 2 +String c = "Hello, ~{a}" +Map[String, Int] d = { "a": 0, "b": 1, "c": 2} +} +meta { +a: "hello" +b: 'world' +c: 5 +d: -0xf +e: 1.0e10 +f: -2. +g: true +h: false +i: null +j: { +a: [1, 2, 3], +b: ["hello", "world", "!"], +c: { +x: 1, +y: 2, +z: 3 +} +} +k: [ +{ +a: {}, +b: 0, +c: "", +d: '', +e: [], +}, +{ +x: [1.0, 2.0, 3.0] +} +] +} +call no_params +call with_params { input: a, b, c, d = 1 } +call qualified.name +call qualified.name { input: a = 1, b = 2, c = "3" } +call aliased as x +call aliased as x { input: } +call f after x after y +call f after x after y { input: a = [] } +call f as x after x +call f as x after x after y { input: name = "hello" } +call test_task as foo { +input: bowchicka = "wowwow" +} +if ( +true +) { + +call test_task after foo { +input: bowchicka = "bowchicka" +} +scatter (i in range(3)) { +call test_task as bar { +input: bowchicka = i * 42 +} +} +} + +} +task +test_task +{ +command <<<>>> +input { +String bowchicka +} +parameter_meta { +bowchicka: { +type: "String", +label: "Bowchicka" +} +} +} + +struct SpliceJunctionMotifs { +Int noncanonical_motifs +Int GT_AG_and_CT_AC_motif +} diff --git a/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl new file mode 100644 index 000000000..6a2d1da70 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl @@ -0,0 +1,23 @@ +version 1.1 + +# fileA 1.1 +import # fileA 1.2 + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" # fileA 2.3 + # fileA 3.1 + as # fileA 3.2 + # fileA 4.1 + bar # fileA 4.2 + # fileA 5.1 + alias # fileA 5.2 + # fileA 6.1 + qux # fileA 6.2 + # fileA 7.1 + as # fileA 7.2 + # fileA 8.1 + Qux # fileA 8.2 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# this comment belongs to fileC +import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl new file mode 100644 index 000000000..1c32809f6 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# fileA 1.1 +import # fileA 1.2 +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" # fileA 2.3 +# fileA 3.1 +as # fileA 3.2 +# fileA 4.1 +bar # fileA 4.2 +# fileA 5.1 +alias # fileA 5.2 +# fileA 6.1 +qux # fileA 6.2 +# fileA 7.1 +as # fileA 7.2 +# fileA 8.1 +Qux # fileA 8.2 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl new file mode 100644 index 000000000..e23115af6 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl @@ -0,0 +1,12 @@ +version 1.0 + +import # fileA 1 + "fileA.wdl" # fileA 2 + as # fileA 3 + bar # fileA 4 + alias # fileA 5 + qux # fileA 6 + as # fileA 7 + Qux # fileA 8 +import "fileB.wdl" as foo # fileB +import "fileC.wdl" # fileC diff --git a/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl new file mode 100644 index 000000000..f633e72d8 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl @@ -0,0 +1,12 @@ +version 1.0 +import "fileB.wdl" as foo # fileB +workflow test {} +import "fileC.wdl" # fileC +import # fileA 1 +"fileA.wdl" # fileA 2 +as # fileA 3 +bar # fileA 4 +alias # fileA 5 +qux # fileA 6 +as # fileA 7 +Qux # fileA 8 diff --git a/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl new file mode 100644 index 000000000..564a6c05f --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl @@ -0,0 +1,5 @@ +version 1.1 + +import "fileA.wdl" as bar alias cows as horses alias cats as dogs +import "fileB.wdl" as foo +import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl new file mode 100644 index 000000000..e69a1a727 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl @@ -0,0 +1,7 @@ + version 1.1 + + import "fileB.wdl" as foo + import "fileA.wdl" as bar alias cows as horses + alias cats as dogs + workflow test {} + import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl new file mode 100644 index 000000000..8b07048e9 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl @@ -0,0 +1,23 @@ +version 1.1 + +# fileA 1 +import + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" + # fileA 3 + as + # fileA 4 + bar + # fileA 5 + alias + # fileA 6 + qux + # fileA 7 + as + # fileA 8 + Qux +# this comment belongs to fileB +import "fileB.wdl" as foo +# this comment belongs to fileC +import "fileC.wdl" diff --git a/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl new file mode 100644 index 000000000..a27e7a4fc --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" +# this comment belongs to fileB +import "fileB.wdl" as foo +# fileA 1 +import +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" +# fileA 3 +as +# fileA 4 +bar +# fileA 5 +alias +# fileA 6 +qux +# fileA 7 +as +# fileA 8 +Qux diff --git a/backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl new file mode 100644 index 000000000..6cd003333 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl @@ -0,0 +1,2 @@ +version # interrupt + 1.2 # how far should '1.2' be indented? diff --git a/backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl b/backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl new file mode 100644 index 000000000..30e667287 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl @@ -0,0 +1,10 @@ +version # interrupt +1.2 # how far should '1.2' be indented? + +workflow +# interrupt +test # should this be indented? +# interrupt +{ meta # interrupt +{ # how far should this bracket be indented? +}} \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt b/backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt new file mode 100644 index 000000000..335221306 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt @@ -0,0 +1,205 @@ +'source.wdl' obtained from: https://github.com/stjude/seaseq/blob/49493a7097e655671b915171e6debe40fa284200/seaseq-case.wdl +on the date 08-05-2024. +It was accompanied by the following license: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl new file mode 100644 index 000000000..a25a05879 --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl @@ -0,0 +1,17 @@ +version 1.0 + +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/tasks/sicer.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra +import "workflows/workflows/bamtogff.wdl" +import "workflows/workflows/mapping.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/workflows/visualization.wdl" as viz diff --git a/backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl b/backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl new file mode 100644 index 000000000..94c76656e --- /dev/null +++ b/backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl @@ -0,0 +1,898 @@ +version 1.0 +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/workflows/bamtogff.wdl" +import "workflows/tasks/sicer.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/workflows/visualization.wdl" as viz +import "workflows/workflows/mapping.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra + +workflow seaseq { + String pipeline_ver = 'v2.0.0' + + meta { + title: 'SEAseq Analysis' + summary: 'Single-End Antibody Sequencing (SEAseq) Pipeline' + description: 'A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis.' + version: '2.0.0' + details: { + citation: 'https://doi.org/10.1186/s12859-022-04588-z', + contactEmail: 'modupeore.adetunji@stjude.org', + contactOrg: "St Jude Children's Research Hospital", + contactUrl: "", + upstreamLicenses: "MIT", + upstreamUrl: 'https://github.com/stjude/seaseq', + whatsNew: [ + { + version: "2.0", + changes: ["version of case/sample only", "single-end sequencing with input/control sequencing data", "Initial release"] + } + ] + } + parameter_group: { + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .', + help: 'Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input FASTQ data', + description: 'Genomic input files for experiment.', + help: 'Input one or more sample data and/or SRA identifiers.' + }, + analysis_parameter: { + title: 'Analysis parameter', + description: 'Analysis settings needed for experiment.', + help: 'Analysis settings; such output analysis file name.' + } + } + } + input { + # group: reference_genome + File reference + File? spikein_reference + File? blacklist + File gtf + Array[File]? bowtie_index + Array[File]? spikein_bowtie_index + Array[File]? motif_databases + + # group: input_genomic_data + Array[String]? sample_sraid + Array[File]? sample_fastq + + # group: analysis_parameter + String? results_name + Boolean run_motifs=true + + } + + parameter_meta { + reference: { + description: 'Reference FASTA file', + group: 'reference_genome', + patterns: ["*.fa", "*.fasta", "*.fa.gz", "*.fasta.gz"] + } + blacklist: { + description: 'Blacklist file in BED format', + group: 'reference_genome', + help: 'If defined, blacklist regions listed are excluded after reference alignment.', + patterns: ["*.bed", "*.bed.gz"] + } + gtf: { + description: 'gene annotation file (.gtf)', + group: 'reference_genome', + help: 'Input gene annotation file from RefSeq or GENCODE (.gtf).', + patterns: ["*.gtf", "*.gtf.gz", "*.gff", "*.gff.gz", "*.gff3", "*.gff3.gz"] + } + bowtie_index: { + description: 'bowtie v1 index files (*.ebwt)', + group: 'reference_genome', + help: 'If not defined, bowtie v1 index files are generated, will take a longer compute time.', + patterns: ["*.ebwt"] + } + motif_databases: { + description: 'One or more of the MEME suite motif databases (*.meme)', + group: 'reference_genome', + help: 'Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).', + patterns: ["*.meme"] + } + sample_sraid: { + description: 'One or more sample SRA (Sequence Read Archive) run identifiers', + group: 'input_genomic_data', + help: 'Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).', + example: 'SRR12345678' + } + sample_fastq: { + description: 'One or more sample FASTQs', + group: 'input_genomic_data', + help: 'Upload zipped FASTQ files.', + patterns: ["*.fq.gz", "*.fastq.gz"] + } + results_name: { + description: 'Experiment results custom name', + group: 'analysis_parameter', + help: 'Input preferred analysis results name (recommended if multiple FASTQs are provided).', + example: 'AllMerge_mapped' + } + run_motifs: { + description: 'Perform Motif Analysis', + group: 'analysis_parameter', + help: 'Setting this means Motif Discovery and Enrichment analysis will be performed.', + example: true + } + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 1 ----------- ### +### ------ Pre-process Analysis Files ------ ### +### ---------------------------------------- ### + + # Process SRRs + if ( defined(sample_sraid) ) { + # Download sample file(s) from SRA database + # outputs: + # fastqdump.fastqfile : downloaded sample files in fastq.gz format + Array[String] string_sra = [1] #buffer to allow for sra_id optionality + Array[String] s_sraid = select_first([sample_sraid, string_sra]) + scatter (eachsra in s_sraid) { + call sra.fastqdump { + input : + sra_id=eachsra, + cloud=false + } + } # end scatter each sra + + Array[File] sample_srafile = flatten(fastqdump.fastqfile) + } # end if sample_sraid + + # Generating INDEX files + #1. Bowtie INDEX files if not provided + if ( !defined(bowtie_index) ) { + # create bowtie index when not provided + call bowtie.index as bowtie_idx { + input : + reference=reference + } + } + #2. Make sure indexes are six else build indexes + if ( defined(bowtie_index) ) { + # check total number of bowtie indexes provided + Array[String] string_bowtie_index = [1] #buffer to allow for bowtie_index optionality + Array[File] int_bowtie_index = select_first([bowtie_index, string_bowtie_index]) + if ( length(int_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as bowtie_idx_2 { + input : + reference=reference + } + } + } + Array[File] actual_bowtie_index = select_first([bowtie_idx_2.bowtie_indexes, bowtie_idx.bowtie_indexes, bowtie_index]) + + # Spike-in DNA + #3. Bowtie INDEX files if not provided + String string_spikein = "1" + Array[String] string_spikein_buffer = [1] + if ( !defined(spikein_bowtie_index) && defined(spikein_reference) ) { + # create bowtie index on spikein genome + call bowtie.index as spikein_bowtie_idx { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + + #4. Make sure indexes are six else build indexes for Spike-in DNA + if ( defined(spikein_bowtie_index) ) { + # check total number of bowtie indexes provided + Array[File] int_spikein_bowtie_index = select_first([spikein_bowtie_index, string_spikein_buffer]) + if ( length(int_spikein_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as spikein_bowtie_idx_2 { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + } + Array[File] actual_spikein_bowtie_index = select_first([spikein_bowtie_idx_2.bowtie_indexes, spikein_bowtie_idx.bowtie_indexes, spikein_bowtie_index, string_spikein_buffer]) + + # FASTA faidx and chromsizes and effective genome size + call samtools.faidx as samtools_faidx { + # create FASTA index and chrom sizes files + input : + reference=reference + } + call util.effective_genome_size as egs { + # effective genome size for FASTA + input : + reference=reference + } + + # Process FASTQs + if ( defined(sample_fastq) ) { + + Array[String] string_fastq = [1] #buffer to allow for fastq optionality + Array[File] s_fastq = select_first([sample_fastq, string_fastq]) + + Array[File] sample_fastqfile = s_fastq + } + Array[File] original_fastqfiles = flatten(select_all([sample_srafile, sample_fastqfile])) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 1 ---------------- ### +### ----------- B: remove Spike-IN reads ------------ ### +### ------------------------------------------------- ### + + # if multiple fastqfiles are provided + Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false + Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false + + if ( defined(spikein_bowtie_index) || defined(spikein_reference) ) { + scatter (eachfastq in original_fastqfiles) { + call fastqc.fastqc as spikein_indv_fastqc { + input : + inputfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' + } + call util.basicfastqstats as spikein_indv_bfs { + input : + fastqfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + call bowtie.spikein_SE as spikein_indv_map { + input : + fastqfile=eachfastq, + index_files=actual_spikein_bowtie_index, + metricsfile=spikein_indv_bfs.metrics_out, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + } + + Array[File] spikein_fastqfiles = spikein_indv_map.unaligned + } + Array[File] fastqfiles = select_first([spikein_fastqfiles, original_fastqfiles]) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 2 ---------------- ### +### ---- A: analysis if multiple FASTQs provided ---- ### +### ------------------------------------------------- ### + + if ( multi_fastq ) { + scatter (eachfastq in fastqfiles) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as indv_fastqc { + input : + inputfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as indv_bfs { + input : + fastqfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping as indv_mapping { + input : + fastqfile=eachfastq, + index_files=actual_bowtie_index, + metricsfile=indv_bfs.metrics_out, + blacklist=blacklist, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as indv_bamfqc { + input : + inputfile=indv_mapping.sorted_bam, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as indv_runspp { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call bedtools.bamtobed as indv_bamtobed { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call util.evalstats as indv_summarystats { + input: + fastq_type="SEAseq Sample FASTQ", + bambed=indv_bamtobed.bedfile, + sppfile=indv_runspp.spp_out, + fastqczip=indv_fastqc.zipfile, + bamflag=indv_mapping.bam_stats, + rmdupflag=indv_mapping.mkdup_stats, + bkflag=indv_mapping.bklist_stats, + fastqmetrics=indv_bfs.metrics_out, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + } # end scatter (for each sample fastq) + + # MERGE BAM FILES + # Execute analysis on merge bam file + # Analysis executed: + # Merge BAM (if more than 1 fastq is provided) + # FastQC on Merge BAM (AllMerge__mapped) + + # merge bam files and perform fasTQC if more than one is provided + call util.mergehtml { + input: + htmlfiles=indv_summarystats.xhtml, + txtfiles=indv_summarystats.textfile, + default_location='SAMPLE', + outputfile = 'AllMapped_' + length(fastqfiles) + '_seaseq-summary-stats.html' + } + + call samtools.mergebam { + input: + bamfiles=indv_mapping.sorted_bam, + metricsfiles=indv_bfs.metrics_out, + default_location = if defined(results_name) then results_name + '/BAM_files' else 'AllMerge_' + length(indv_mapping.sorted_bam) + '_mapped' + '/BAM_files', + outputfile = if defined(results_name) then results_name + '.sorted.bam' else 'AllMerge_' + length(fastqfiles) + '_mapped.sorted.bam' + } + + call fastqc.fastqc as mergebamfqc { + input: + inputfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/QC/FastQC' + } + + call samtools.indexstats as mergeindexstats { + input: + bamfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + + if ( defined(blacklist) ) { + # remove blacklist regions + String string_blacklist = "" #buffer to allow for blacklist optionality + File blacklist_file = select_first([blacklist, string_blacklist]) + call bedtools.intersect as merge_rmblklist { + input : + fileA=mergebam.mergebam, + fileB=blacklist_file, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files', + nooverlap=true + } + call samtools.indexstats as merge_bklist { + input : + bamfile=merge_rmblklist.intersect_out, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + } # end if blacklist provided + + File mergebam_afterbklist = select_first([merge_rmblklist.intersect_out, mergebam.mergebam]) + + call samtools.markdup as merge_markdup { + input : + bamfile=mergebam_afterbklist, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + + call samtools.indexstats as merge_mkdup { + input : + bamfile=merge_markdup.mkdupbam, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + } # end if length(fastqfiles) > 1: multi_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 2 ----------- ### +### -- B: analysis if one FASTQ provided --- ### +### ---------------------------------------- ### + + # if only one fastqfile is provided + if ( one_fastq ) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as uno_fastqc { + input : + inputfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as uno_bfs { + input : + fastqfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping { + input : + fastqfile=fastqfiles[0], + index_files=actual_bowtie_index, + metricsfile=uno_bfs.metrics_out, + blacklist=blacklist, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as uno_bamfqc { + input : + inputfile=mapping.sorted_bam, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as uno_runspp { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + + call bedtools.bamtobed as uno_bamtobed { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + } # end if length(fastqfiles) == 1: one_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 3 ----------- ### +### ----------- ChIP-seq analysis ---------- ### +### ---------------------------------------- ### + + # ChIP-seq and downstream analysis + # Execute analysis on merge bam file + # Analysis executed: + # FIRST: Check if reads are mapped + # Peaks identification (SICER, MACS, ROSE) + # Motif analysis + # Complete Summary statistics + + #collate correct files for downstream analysis + File sample_bam = select_first([mergebam_afterbklist, mapping.bklist_bam, mapping.sorted_bam]) + + call macs.macs { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="auto", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-auto', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-auto' + } + + call util.addreadme { + input : + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS' + } + + call macs.macs as all { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="all", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-all', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-all' + } + + call macs.macs as nomodel { + input : + bamfile=sample_bam, + nomodel=true, + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-nm', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_nm' + } + + call bamtogff.bamtogff { + input : + gtffile=gtf, + chromsizes=samtools_faidx.chromsizes, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/BAM_Density' + } + + call bedtools.bamtobed as forsicerbed { + input : + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]) + } + + call sicer.sicer { + input : + bedfile=forsicerbed.bedfile, + chromsizes=samtools_faidx.chromsizes, + genome_fraction=egs.genomefraction, + fragmentlength=select_first([uno_bfs.readlength, mergebam.avg_readlength]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/BROAD_peaks', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call rose.rose { + input : + gtffile=gtf, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + bedfile_auto=macs.peakbedfile, + bedfile_all=all.peakbedfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/STITCHED_peaks' + } + + call runspp.runspp { + input: + bamfile=sample_bam + } + + call util.peaksanno { + input : + gtffile=gtf, + bedfile=macs.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as all_peaksanno { + input : + gtffile=gtf, + bedfile=all.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=all.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as nomodel_peaksanno { + input : + gtffile=gtf, + bedfile=nomodel.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=nomodel.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as sicer_peaksanno { + input : + gtffile=gtf, + bedfile=sicer.scoreisland, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/BROAD_peaks' + } + + # Motif Analysis + if (run_motifs) { + call motifs.motifs { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=macs.peakbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call util.flankbed { + input : + bedfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call motifs.motifs as flank { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=flankbed.flankbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + } + + call viz.visualization { + input: + wigfile=macs.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=macs.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizall { + input: + wigfile=all.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=all.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as viznomodel { + input: + wigfile=nomodel.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=nomodel.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizsicer { + input: + wigfile=sicer.wigfile, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call bedtools.bamtobed as finalbed { + input: + bamfile=sample_bam + } + + call sortbed.sortbed { + input: + bedfile=finalbed.bedfile + } + + call bedtools.intersect { + input: + fileA=macs.peakbedfile, + fileB=sortbed.sortbed_out, + countoverlap=true, + sorted=true + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 4 ----------- ### +### ---------- Summary Statistics ---------- ### +### ---------------------------------------- ### + + String string_qual = "" #buffer to allow for optionality in if statement + + #SUMMARY STATISTICS + if ( one_fastq ) { + call util.evalstats as uno_summarystats { + # SUMMARY STATISTICS of sample file (only 1 sample file provided) + input: + fastq_type="SEAseq Sample FASTQ", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([uno_bamfqc.zipfile, string_qual]), + bamflag=mapping.bam_stats, + rmdupflag=mapping.mkdup_stats, + bkflag=mapping.bklist_stats, + fastqmetrics=uno_bfs.metrics_out, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as uno_overallsummary { + # Presenting all quality stats for the analysis + input: + overallqc_html=uno_summarystats.xhtml, + overallqc_txt=uno_summarystats.textfile + } + } # end if one_fastq + + if ( multi_fastq ) { + call util.evalstats as merge_summarystats { + # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) + input: + fastq_type="SEAseq Comprehensive", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([mergebamfqc.zipfile, string_qual]), + bamflag=mergeindexstats.flagstats, + rmdupflag=merge_mkdup.flagstats, + bkflag=merge_bklist.flagstats, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as merge_overallsummary { + # Presenting all quality stats for the analysis + input: + sampleqc_html=mergehtml.xhtml, + overallqc_html=merge_summarystats.xhtml, + sampleqc_txt=mergehtml.mergetxt, + overallqc_txt=merge_summarystats.textfile + } + } # end if multi_fastq + + output { + #SPIKE-IN + Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile + Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile + Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output + + #FASTQC + Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile + Array[File?]? indv_s_zipfile = indv_fastqc.zipfile + Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile + Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile + + File? s_mergebam_htmlfile = mergebamfqc.htmlfile + File? s_mergebam_zipfile = mergebamfqc.zipfile + + File? uno_s_htmlfile = uno_fastqc.htmlfile + File? uno_s_zipfile = uno_fastqc.zipfile + File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile + File? uno_s_bam_zipfile = uno_bamfqc.zipfile + + #BASICMETRICS + Array[File?]? s_metrics_out = indv_bfs.metrics_out + File? uno_s_metrics_out = uno_bfs.metrics_out + + #BAMFILES + Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam + Array[File?]? indv_s_indexbam = indv_mapping.bam_index + Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam + Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index + Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam + Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index + + File? uno_s_sortedbam = mapping.sorted_bam + File? uno_s_indexstatsbam = mapping.bam_index + File? uno_s_bkbam = mapping.bklist_bam + File? uno_s_bkindexbam = mapping.bklist_index + File? uno_s_rmbam = mapping.mkdup_bam + File? uno_s_rmindexbam = mapping.mkdup_index + + File? s_mergebamfile = mergebam.mergebam + File? s_mergebamindex = mergeindexstats.indexbam + File? s_bkbam = merge_rmblklist.intersect_out + File? s_bkindexbam = merge_bklist.indexbam + File? s_rmbam = merge_markdup.mkdupbam + File? s_rmindexbam = merge_mkdup.indexbam + + #MACS + File? peakbedfile = macs.peakbedfile + File? peakxlsfile = macs.peakxlsfile + File? summitsfile = macs.summitsfile + File? negativexlsfile = macs.negativepeaks + File? wigfile = macs.wigfile + File? all_peakbedfile = all.peakbedfile + File? all_peakxlsfile = all.peakxlsfile + File? all_summitsfile = all.summitsfile + File? all_wigfile = all.wigfile + File? all_negativexlsfile = all.negativepeaks + File? nm_peakbedfile = nomodel.peakbedfile + File? nm_peakxlsfile = nomodel.peakxlsfile + File? nm_summitsfile = nomodel.summitsfile + File? nm_wigfile = nomodel.wigfile + File? nm_negativexlsfile = nomodel.negativepeaks + File? readme_peaks = addreadme.readme_peaks + + #SICER + File? scoreisland = sicer.scoreisland + File? sicer_wigfile = sicer.wigfile + + #ROSE + File? pngfile = rose.pngfile + File? mapped_union = rose.mapped_union + File? mapped_stitch = rose.mapped_stitch + File? enhancers = rose.enhancers + File? super_enhancers = rose.super_enhancers + File? gff_file = rose.gff_file + File? gff_union = rose.gff_union + File? union_enhancers = rose.union_enhancers + File? stitch_enhancers = rose.stitch_enhancers + File? e_to_g_enhancers = rose.e_to_g_enhancers + File? g_to_e_enhancers = rose.g_to_e_enhancers + File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers + File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers + File? supergenes = rose.super_genes + File? allgenes = rose.all_genes + + #MOTIFS + File? flankbedfile = flankbed.flankbedfile + + File? ame_tsv = motifs.ame_tsv + File? ame_html = motifs.ame_html + File? ame_seq = motifs.ame_seq + File? meme = motifs.meme_out + File? meme_summary = motifs.meme_summary + + File? summit_ame_tsv = flank.ame_tsv + File? summit_ame_html = flank.ame_html + File? summit_ame_seq = flank.ame_seq + File? summit_meme = flank.meme_out + File? summit_meme_summary = flank.meme_summary + + #BAM2GFF + File? s_matrices = bamtogff.s_matrices + File? densityplot = bamtogff.densityplot + File? pdf_gene = bamtogff.pdf_gene + File? pdf_h_gene = bamtogff.pdf_h_gene + File? png_h_gene = bamtogff.png_h_gene + File? jpg_h_gene = bamtogff.jpg_h_gene + File? pdf_promoters = bamtogff.pdf_promoters + File? pdf_h_promoters = bamtogff.pdf_h_promoters + File? png_h_promoters = bamtogff.png_h_promoters + File? jpg_h_promoters = bamtogff.jpg_h_promoters + + #PEAKS-ANNOTATION + File? peak_promoters = peaksanno.peak_promoters + File? peak_genebody = peaksanno.peak_genebody + File? peak_window = peaksanno.peak_window + File? peak_closest = peaksanno.peak_closest + File? peak_comparison = peaksanno.peak_comparison + File? gene_comparison = peaksanno.gene_comparison + File? pdf_comparison = peaksanno.pdf_comparison + + File? all_peak_promoters = all_peaksanno.peak_promoters + File? all_peak_genebody = all_peaksanno.peak_genebody + File? all_peak_window = all_peaksanno.peak_window + File? all_peak_closest = all_peaksanno.peak_closest + File? all_peak_comparison = all_peaksanno.peak_comparison + File? all_gene_comparison = all_peaksanno.gene_comparison + File? all_pdf_comparison = all_peaksanno.pdf_comparison + + File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters + File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody + File? nomodel_peak_window = nomodel_peaksanno.peak_window + File? nomodel_peak_closest = nomodel_peaksanno.peak_closest + File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison + File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison + File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison + + File? sicer_peak_promoters = sicer_peaksanno.peak_promoters + File? sicer_peak_genebody = sicer_peaksanno.peak_genebody + File? sicer_peak_window = sicer_peaksanno.peak_window + File? sicer_peak_closest = sicer_peaksanno.peak_closest + File? sicer_peak_comparison = sicer_peaksanno.peak_comparison + File? sicer_gene_comparison = sicer_peaksanno.gene_comparison + File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison + + #VISUALIZATION + File? bigwig = visualization.bigwig + File? norm_wig = visualization.norm_wig + File? tdffile = visualization.tdffile + File? n_bigwig = viznomodel.bigwig + File? n_norm_wig = viznomodel.norm_wig + File? n_tdffile = viznomodel.tdffile + File? a_bigwig = vizall.bigwig + File? a_norm_wig = vizall.norm_wig + File? a_tdffile = vizall.tdffile + + File? s_bigwig = vizsicer.bigwig + File? s_norm_wig = vizsicer.norm_wig + File? s_tdffile = vizsicer.tdffile + + #QC-STATS + Array[File?]? s_qc_statsfile = indv_summarystats.statsfile + Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile + Array[File?]? s_qc_textfile = indv_summarystats.textfile + File? s_qc_mergehtml = mergehtml.mergefile + + File? s_uno_statsfile = uno_summarystats.statsfile + File? s_uno_htmlfile = uno_summarystats.htmlfile + File? s_uno_textfile = uno_summarystats.textfile + + File? statsfile = merge_summarystats.statsfile + File? htmlfile = merge_summarystats.htmlfile + File? textfile = merge_summarystats.textfile + + File? summaryhtml = select_first([uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml]) + File? summarytxt = select_first([uno_overallsummary.summarytxt,merge_overallsummary.summarytxt]) + } +} \ No newline at end of file diff --git a/backup/wdl-format-old/CHANGELOG.md b/backup/wdl-format-old/CHANGELOG.md new file mode 100644 index 000000000..3eeeadd81 --- /dev/null +++ b/backup/wdl-format-old/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added + +* Added the `wdl-format` crate for formatting WDL documents ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)). diff --git a/backup/wdl-format-old/Cargo.toml b/backup/wdl-format-old/Cargo.toml new file mode 100644 index 000000000..eec06496c --- /dev/null +++ b/backup/wdl-format-old/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "wdl-format" +version = "0.1.0" +license.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] +anyhow.workspace = true +wdl-ast = { path = "../wdl-ast", version = "0.5.0" } +wdl-grammar = { version = "0.6.0", path = "../wdl-grammar" } + +[dev-dependencies] +pretty_assertions = { workspace = true } +approx = { workspace = true } +rayon = { workspace = true } +colored = { workspace = true } +codespan-reporting = { workspace = true } + +[features] +codespan = ["wdl-ast/codespan"] + +[[test]] +name = "format" +required-features = ["codespan"] +harness = false diff --git a/backup/wdl-format-old/src/formatter.rs b/backup/wdl-format-old/src/formatter.rs new file mode 100644 index 000000000..6ef467b44 --- /dev/null +++ b/backup/wdl-format-old/src/formatter.rs @@ -0,0 +1,131 @@ +//! Contains the `Formatter` struct, which is used to keep track of the +//! current formatting state. This includes the current indentation level and +//! whether the current line has been interrupted by comments. +//! The state becomes "interrupted" by comments when a comment forces a newline +//! where it would otherwise not be expected. In this case, the next line(s) +//! will be indented by one level. + +use crate::Formattable; +use crate::NEWLINE; + +/// Space constant used for formatting. +pub const SPACE: &str = " "; +/// Indentation constant used for formatting. Indentation is four spaces +/// per-level. +pub const INDENT: &str = " "; +/// Inline comment space constant used for formatting. +/// +/// Inline comments should start two spaces after the end of the element they +/// are commenting on. +pub const INLINE_COMMENT_SPACE: &str = " "; + +/// The `Formatter` struct is used to keep track of the current formatting +/// state. This includes the current indentation level and whether the current +/// line has been interrupted by comments. +#[derive(Debug, Clone, Copy, Default)] +pub struct Formatter { + /// The current indentation level. + indent_level: usize, + /// Whether the current line has been interrupted by comments. + interrupted_by_comments: bool, +} + +impl Formatter { + /// Format an element. + pub fn format( + mut self, + element: &T, + writer: &mut F, + ) -> std::fmt::Result { + element.format(writer, &mut self) + } + + /// Add the current indentation to the writer. + /// The indentation level will be temporarily increased by one if the + /// current line has been interrupted by comments. + pub fn indent(&self, writer: &mut T) -> std::fmt::Result { + write!( + writer, + "{}", + INDENT.repeat(self.indent_level + (if self.interrupted_by_comments { 1 } else { 0 })) + ) + } + + /// Add a space or an indentation to the writer. If the current line has + /// been interrupted by comments, an indentation is added. Otherwise, a + /// space is added. + pub fn space_or_indent(&mut self, writer: &mut T) -> std::fmt::Result { + if !self.interrupted_by_comments { + write!(writer, "{}", SPACE)?; + } else { + self.indent(writer)?; + } + self.reset_interrupted(); + Ok(()) + } + + /// Add a level of indentation. + pub fn increment_indent(&mut self) { + self.indent_level += 1; + self.reset_interrupted(); + } + + /// Remove a level of indentation. + pub fn decrement_indent(&mut self) { + self.indent_level = self.indent_level.saturating_sub(1); + self.reset_interrupted(); + } + + /// Check if the current line has been interrupted by comments. + pub fn interrupted(&self) -> bool { + self.interrupted_by_comments + } + + /// Interrupt the current line with comments. + pub fn interrupt(&mut self) { + self.interrupted_by_comments = true; + } + + /// Reset the interrupted state. + pub fn reset_interrupted(&mut self) { + self.interrupted_by_comments = false; + } + + pub fn format_preceding_trivia( + &mut self, + writer: &mut F, + comments: Box<[String]>, + would_be_interrupting: bool, + respect_blank_lines: bool, + ) -> std::fmt::Result { + if would_be_interrupting && !comments.is_empty() && !self.interrupted_by_comments { + write!(writer, "{}", NEWLINE)?; + self.interrupt(); + } + for comment in comments { + if !respect_blank_lines && !comment.starts_with('#') { + continue; + } + self.indent(writer)?; + write!(writer, "{}{}", comment, NEWLINE)?; + } + Ok(()) + } + + pub fn format_inline_comment( + &mut self, + writer: &mut F, + comment: Option, + would_be_interrupting: bool, + ) -> std::fmt::Result { + if let Some(ref comment) = comment { + write!(writer, "{}{}{}", INLINE_COMMENT_SPACE, comment, NEWLINE)?; + } + if would_be_interrupting && comment.is_some() { + self.interrupt(); + } else if !would_be_interrupting && comment.is_none() { + write!(writer, "{}", NEWLINE)?; + } + Ok(()) + } +} diff --git a/backup/wdl-format-old/src/import.rs b/backup/wdl-format-old/src/import.rs new file mode 100644 index 000000000..5cc1cb96b --- /dev/null +++ b/backup/wdl-format-old/src/import.rs @@ -0,0 +1,174 @@ +//! Format import statements. + +use wdl_ast::token_child; +use wdl_ast::v1::AliasKeyword; +use wdl_ast::v1::AsKeyword; +use wdl_ast::v1::ImportAlias; +use wdl_ast::v1::ImportKeyword; +use wdl_ast::v1::ImportStatement; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Ident; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; +use wdl_grammar::SyntaxExt; + +use crate::Formattable; +use crate::Formatter; + +impl Formattable for ImportKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for AsKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for AliasKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for ImportAlias { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; + + let alias_keyword = self.alias_keyword(); + formatter.space_or_indent(writer)?; + alias_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, alias_keyword.syntax().inline_comment(), true)?; + + let (source, target) = self.names(); + + formatter.format_preceding_trivia( + writer, + source.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + source.format(writer, formatter)?; + formatter.format_inline_comment(writer, source.syntax().inline_comment(), true)?; + + let as_keyword = self.as_keyword(); + formatter.format_preceding_trivia( + writer, + as_keyword.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + as_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; + + formatter.format_preceding_trivia( + writer, + target.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + target.format(writer, formatter)?; + + formatter.format_inline_comment(writer, self.syntax().inline_comment(), true) + } +} + +impl Formattable for ImportStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + formatter.format_preceding_trivia( + writer, + self.syntax().preceding_trivia(), + false, + false, + )?; + + let import_keyword = self.keyword(); + import_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, import_keyword.syntax().inline_comment(), true)?; + + let uri = self.uri(); + formatter.format_preceding_trivia(writer, uri.syntax().preceding_trivia(), true, false)?; + formatter.space_or_indent(writer)?; + uri.format(writer, formatter)?; + formatter.format_inline_comment(writer, uri.syntax().inline_comment(), true)?; + + let as_keyword = token_child::(self.syntax()); + if let Some(as_keyword) = as_keyword { + formatter.format_preceding_trivia( + writer, + as_keyword.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + as_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; + + let ident = self + .explicit_namespace() + .expect("import with as clause should have an explicit namespace"); + formatter.format_preceding_trivia( + writer, + ident.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + ident.format(writer, formatter)?; + formatter.format_inline_comment(writer, ident.syntax().inline_comment(), true)?; + } + + for alias in self.aliases() { + alias.format(writer, formatter)?; + } + + formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) + } +} + +/// Sorts import statements by their core components. +/// +/// The core components of an import statement are the URI and the namespace. +/// These two elements guarantee a unique import statement. +pub fn sort_imports(a: &ImportStatement, b: &ImportStatement) -> std::cmp::Ordering { + ( + a.uri() + .text() + .expect("import URI cannot have placeholders") + .as_str(), + &a.namespace().expect("import namespace should exist").0, + ) + .cmp(&( + b.uri() + .text() + .expect("import URI cannot have placeholders") + .as_str(), + &b.namespace().expect("import namespace should exist").0, + )) +} diff --git a/backup/wdl-format-old/src/lib.rs b/backup/wdl-format-old/src/lib.rs new file mode 100644 index 000000000..283a2c77f --- /dev/null +++ b/backup/wdl-format-old/src/lib.rs @@ -0,0 +1,190 @@ +//! A library for auto-formatting WDL code. + +#![warn(missing_docs)] +#![warn(rust_2018_idioms)] +#![warn(rust_2021_compatibility)] +#![warn(missing_debug_implementations)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(rustdoc::broken_intra_doc_links)] + +use anyhow::Result; +use wdl_ast::token_child; +use wdl_ast::v1::VersionKeyword; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Diagnostic; +use wdl_ast::Document; +use wdl_ast::Ident; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; +use wdl_ast::SyntaxNode; +use wdl_ast::Validator; +use wdl_ast::Version; +use wdl_ast::VersionStatement; +use wdl_grammar::SyntaxExt; + +mod formatter; +mod import; +// mod metadata; +mod task; +mod v1; +mod workflow; + +use formatter::Formatter; + +/// Newline constant used for formatting on windows platforms. +#[cfg(windows)] +pub const NEWLINE: &str = "\r\n"; +/// Newline constant used for formatting on non-windows platforms. +#[cfg(not(windows))] +pub const NEWLINE: &str = "\n"; +/// String terminator constant used for formatting. +const STRING_TERMINATOR: char = '"'; +/// Lint directive prefix constant used for formatting. +const LINT_DIRECTIVE_PREFIX: &str = "#@"; + +/// A trait for elements that can be formatted. +pub trait Formattable { + /// Format the element and write it to the writer. + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result; +} + +impl Formattable for VersionKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for Version { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for VersionStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + let mut preamble_comments = Vec::new(); + let mut lint_directives = Vec::new(); + + for comment in self.syntax().preceding_trivia() { + if comment.starts_with(LINT_DIRECTIVE_PREFIX) { + lint_directives.push(comment); + } else if comment.starts_with('#') { + preamble_comments.push(comment); + } // else is just a newline + } + + for comment in preamble_comments.iter() { + write!(writer, "{}{}", comment, NEWLINE)?; + } + + // If there are preamble comments, ensure a blank line is inserted + if !preamble_comments.is_empty() { + write!(writer, "{}", NEWLINE)?; + } + + for comment in lint_directives.iter() { + write!(writer, "{}{}", comment, NEWLINE)?; + } + + let version_keyword = self.keyword(); + version_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, version_keyword.syntax().inline_comment(), true)?; + + let version = self.version(); + formatter.format_preceding_trivia( + writer, + version.syntax().preceding_trivia(), + true, + false, + )?; + formatter.space_or_indent(writer)?; + version.format(writer, formatter)?; + formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) + } +} + +impl Formattable for Ident { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for Document { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + let ast = self.ast(); + let ast = ast.as_v1().expect("document should be a v1 document"); + let version_statement = self + .version_statement() + .expect("document should have a version statement"); + version_statement.format(writer, formatter)?; + let mut imports = ast.imports().collect::>(); + if !imports.is_empty() { + write!(writer, "{}", NEWLINE)?; + } + imports.sort_by(import::sort_imports); + for import in imports { + import.format(writer, formatter)?; + } + for item in ast.items() { + if item.syntax().kind() == SyntaxKind::ImportStatementNode { + continue; + } + // write!(writer, "{}", NEWLINE)?; + // item.format(writer, formatter)?; + } + Ok(()) + } +} + +/// Format a WDL document. +pub fn format_document(code: &str) -> Result> { + let (document, diagnostics) = Document::parse(code); + if !diagnostics.is_empty() { + return Err(diagnostics); + } + let mut validator = Validator::default(); + match validator.validate(&document) { + std::result::Result::Ok(_) => { + // The document is valid, so we can format it. + } + Err(diagnostics) => return Err(diagnostics), + } + + let mut result = String::new(); + let formatter = &mut Formatter::default(); + + match formatter.format(&document, &mut result) { + Ok(_) => {} + Err(error) => { + let msg = format!("Failed to format document: {}", error); + return Err(vec![Diagnostic::error(msg)]); + } + } + + Ok(result) +} diff --git a/backup/wdl-format-old/src/metadata.rs b/backup/wdl-format-old/src/metadata.rs new file mode 100644 index 000000000..9bb557f29 --- /dev/null +++ b/backup/wdl-format-old/src/metadata.rs @@ -0,0 +1,365 @@ +//! A module for formatting metadata sections (meta and parameter_meta). + +use wdl_ast::v1::LiteralNull; +use wdl_ast::v1::MetadataArray; +use wdl_ast::v1::MetadataObject; +use wdl_ast::v1::MetadataObjectItem; +use wdl_ast::v1::MetadataSection; +use wdl_ast::v1::MetadataValue; +use wdl_ast::v1::ParameterMetadataSection; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; + +use super::comments::format_inline_comment; +use super::comments::format_preceding_comments; +use super::first_child_of_kind; +use super::format_element_with_comments; +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::LinePosition; +use super::NEWLINE; + +impl Formattable for LiteralNull { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) + } +} + +impl Formattable for MetadataObject { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + format_element_with_comments( + &open_brace, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + let mut commas = self + .syntax() + .children_with_tokens() + .filter(|c| c.kind() == SyntaxKind::Comma); + + for item in self.items() { + item.format(writer, formatter)?; + if let Some(cur_comma) = commas.next() { + format_element_with_comments( + &cur_comma, + writer, + formatter, + LinePosition::End, + |_, _| Ok(()), + )?; + } else { + // No trailing comma was in the input + write!(writer, ",")?; + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + format_preceding_comments(&close_brace, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_brace)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + true, + ) + } +} + +impl Formattable for MetadataArray { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let open_bracket = first_child_of_kind(self.syntax(), SyntaxKind::OpenBracket); + format_element_with_comments( + &open_bracket, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + let mut commas = self + .syntax() + .children_with_tokens() + .filter(|c| c.kind() == SyntaxKind::Comma); + + for item in self.elements() { + formatter.indent(writer)?; + item.format(writer, formatter)?; + if let Some(cur_comma) = commas.next() { + format_element_with_comments( + &cur_comma, + writer, + formatter, + LinePosition::End, + |_, _| Ok(()), + )?; + } else { + // No trailing comma was in the input + write!(writer, ",")?; + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_bracket = first_child_of_kind(self.syntax(), SyntaxKind::CloseBracket); + format_preceding_comments(&close_bracket, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_bracket)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + true, + ) + } +} + +impl Formattable for MetadataValue { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + MetadataValue::String(s) => s.format(writer, formatter), + MetadataValue::Boolean(b) => b.format(writer, formatter), + MetadataValue::Float(f) => f.format(writer, formatter), + MetadataValue::Integer(i) => i.format(writer, formatter), + MetadataValue::Null(n) => n.format(writer, formatter), + MetadataValue::Object(o) => o.format(writer, formatter), + MetadataValue::Array(a) => a.format(writer, formatter), + } + } +} + +impl Formattable for MetadataObjectItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let name = self.name(); + formatter.indent(writer)?; + name.format(writer, formatter)?; + format_inline_comment( + &SyntaxElement::from(name.syntax().clone()), + writer, + formatter, + true, + )?; + + let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + format_element_with_comments( + &colon, + writer, + formatter, + LinePosition::Middle, + |writer, formatter| { + if formatter.interrupted() { + formatter.indent(writer)?; + formatter.reset_interrupted(); + } + Ok(()) + }, + )?; + + let value = self.value(); + format_preceding_comments( + &SyntaxElement::from(value.syntax().clone()), + writer, + formatter, + true, + )?; + formatter.space_or_indent(writer)?; + value.format(writer, formatter)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + true, + ) + } +} + +impl Formattable for MetadataSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let meta_keyword = first_child_of_kind(self.syntax(), SyntaxKind::MetaKeyword); + formatter.indent(writer)?; + write!(writer, "{}", meta_keyword)?; + format_inline_comment(&meta_keyword, writer, formatter, true)?; + + let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + format_element_with_comments( + &open_brace, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } else { + write!(writer, "{}", SPACE)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + for item in self.items() { + item.format(writer, formatter)?; + if formatter.interrupted() { + formatter.reset_interrupted(); + } else { + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + format_preceding_comments(&close_brace, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_brace)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + ) + } +} + +impl Formattable for ParameterMetadataSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + format_preceding_comments( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + )?; + + let parameter_meta_keyword = + first_child_of_kind(self.syntax(), SyntaxKind::ParameterMetaKeyword); + formatter.indent(writer)?; + write!(writer, "{}", parameter_meta_keyword)?; + format_inline_comment(¶meter_meta_keyword, writer, formatter, true)?; + + let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + format_element_with_comments( + &open_brace, + writer, + formatter, + LinePosition::End, + |writer, formatter| { + if formatter.interrupted() { + formatter.reset_interrupted(); + formatter.indent(writer)?; + } else { + write!(writer, "{}", SPACE)?; + } + Ok(()) + }, + )?; + + formatter.increment_indent(); + + for item in self.items() { + item.format(writer, formatter)?; + if formatter.interrupted() { + formatter.reset_interrupted(); + } else { + write!(writer, "{}", NEWLINE)?; + } + } + + formatter.decrement_indent(); + + let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + format_preceding_comments(&close_brace, writer, formatter, false)?; + formatter.indent(writer)?; + write!(writer, "{}", close_brace)?; + format_inline_comment( + &SyntaxElement::from(self.syntax().clone()), + writer, + formatter, + false, + ) + } +} diff --git a/backup/wdl-format-old/src/task.rs b/backup/wdl-format-old/src/task.rs new file mode 100644 index 000000000..6e01b8843 --- /dev/null +++ b/backup/wdl-format-old/src/task.rs @@ -0,0 +1,455 @@ +//! A module for formatting elements in tasks. + +use wdl_ast::v1::CommandPart; +use wdl_ast::v1::CommandSection; +use wdl_ast::v1::CommandText; +use wdl_ast::v1::Decl; +use wdl_ast::v1::RequirementsItem; +use wdl_ast::v1::RequirementsSection; +use wdl_ast::v1::RuntimeItem; +use wdl_ast::v1::RuntimeSection; +use wdl_ast::v1::TaskDefinition; +use wdl_ast::v1::TaskItem; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; + +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::NEWLINE; + +impl Formattable for CommandText { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for CommandSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let command_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::CommandKeyword); formatter.indent(writer)?; + // write!(writer, "{}", command_keyword)?; + // format_inline_comment(&command_keyword, writer, formatter, true)?; + + // // coerce all command sections to use heredoc ('<<<>>>>') syntax + // // (as opposed to bracket ('{}') syntax) + // let open_section = if self.is_heredoc() { + // first_child_of_kind(self.syntax(), SyntaxKind::OpenHeredoc) + // } else { + // first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace) + // }; + // format_preceding_comments(&open_section, writer, formatter, true)?; + + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "<<<")?; + + // for part in self.parts() { + // match part { + // CommandPart::Text(t) => { + // t.format(writer, formatter)?; + // } + // CommandPart::Placeholder(p) => { + // p.format(writer, formatter)?; + // } + // } + // } + + // write!(writer, ">>>")?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RuntimeItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let name = self.name(); + // formatter.indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RuntimeSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let runtime_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::RuntimeKeyword); formatter.indent(writer)?; + // write!(writer, "{}", runtime_keyword)?; + // format_inline_comment(&runtime_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for item in self.items() { + // item.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, true)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RequirementsItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let name = self.name(); + // formatter.indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for RequirementsSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let requirements_keyword = + // first_child_of_kind(self.syntax(), SyntaxKind::RequirementsKeyword); + // formatter.indent(writer)?; + // write!(writer, "{}", requirements_keyword)?; + // format_inline_comment(&requirements_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for item in self.items() { + // item.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, true)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for TaskDefinition { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let task_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::TaskKeyword); formatter.indent(writer)?; + // write!(writer, "{}", task_keyword)?; + // format_inline_comment(&task_keyword, writer, formatter, true)?; + + // let name = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // let mut meta_section_str = String::new(); + // let mut parameter_meta_section_str = String::new(); + // let mut input_section_str = String::new(); + // let mut declaration_section_str = String::new(); + // let mut command_section_str = String::new(); + // let mut output_section_str = String::new(); + // let mut runtime_section_str = String::new(); + // let mut hints_section_str = String::new(); + // let mut requirements_section_str = String::new(); + + // for item in self.items() { + // match item { + // TaskItem::Metadata(m) => { + // m.format(&mut meta_section_str, formatter)?; + // } + // TaskItem::ParameterMetadata(pm) => { + // pm.format(&mut parameter_meta_section_str, formatter)?; + // } + // TaskItem::Input(i) => { + // i.format(&mut input_section_str, formatter)?; + // } + // TaskItem::Declaration(d) => { + // Decl::Bound(d).format(&mut declaration_section_str, formatter)?; + // } + // TaskItem::Command(c) => { + // c.format(&mut command_section_str, formatter)?; + // } + // TaskItem::Output(o) => { + // o.format(&mut output_section_str, formatter)?; + // } + // TaskItem::Runtime(r) => { + // r.format(&mut runtime_section_str, formatter)?; + // } + // TaskItem::Hints(h) => { + // h.format(&mut hints_section_str, formatter)?; + // } + // TaskItem::Requirements(r) => { + // r.format(&mut requirements_section_str, formatter)?; + // } + // } + // } + + // let mut first_section = true; + + // if !meta_section_str.is_empty() { + // first_section = false; + // write!(writer, "{}", meta_section_str)?; + // } + // if !parameter_meta_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // first_section = false; + // write!(writer, "{}", parameter_meta_section_str)?; + // } + // if !input_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // first_section = false; + // write!(writer, "{}", input_section_str)?; + // } + // if !declaration_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // first_section = false; + // write!(writer, "{}", declaration_section_str)?; + // } + // // Command section is required + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", command_section_str)?; + // if !output_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", output_section_str)?; + // } + // if !runtime_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", runtime_section_str)?; + // } + // if !hints_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", hints_section_str)?; + // } + // if !requirements_section_str.is_empty() { + // write!(writer, "{}", NEWLINE)?; + // write!(writer, "{}", requirements_section_str)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, true)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} diff --git a/backup/wdl-format-old/src/v1.rs b/backup/wdl-format-old/src/v1.rs new file mode 100644 index 000000000..561a58ebc --- /dev/null +++ b/backup/wdl-format-old/src/v1.rs @@ -0,0 +1,711 @@ +//! A module for formatting WDL v1 elements. + +use std::fmt::Write; + +use wdl_ast::v1::Decl; +use wdl_ast::v1::DefaultOption; +use wdl_ast::v1::DocumentItem; +use wdl_ast::v1::Expr; +use wdl_ast::v1::HintsItem; +use wdl_ast::v1::HintsSection; +use wdl_ast::v1::InputSection; +use wdl_ast::v1::LiteralBoolean; +use wdl_ast::v1::LiteralFloat; +use wdl_ast::v1::LiteralInteger; +use wdl_ast::v1::LiteralString; +use wdl_ast::v1::OutputSection; +use wdl_ast::v1::Placeholder; +use wdl_ast::v1::PlaceholderOption; +use wdl_ast::v1::SepOption; +use wdl_ast::v1::StringPart; +use wdl_ast::v1::StringText; +use wdl_ast::v1::StructDefinition; +use wdl_ast::v1::StructKeyword; +use wdl_ast::v1::TrueFalseOption; +use wdl_ast::v1::Type; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; +use wdl_grammar::SyntaxExt; + +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::NEWLINE; +use super::STRING_TERMINATOR; + +impl Formattable for DefaultOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let default_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); + // format_preceding_comments(&default_word, writer, formatter, true)?; + // write!(writer, "{}", default_word)?; + // format_inline_comment(&default_word, writer, formatter, true)?; + + // let assignment = first_child_of_kind(self.syntax(), SyntaxKind::Assignment); + // format_preceding_comments(&assignment, writer, formatter, true)?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // let value = self.value(); + // format_preceding_comments( + // &SyntaxElement::from(value.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // value.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(value.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for SepOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let sep_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); + // format_preceding_comments(&sep_word, writer, formatter, true)?; + // write!(writer, "{}", sep_word)?; + // format_inline_comment(&sep_word, writer, formatter, true)?; + + // let assignment = first_child_of_kind(self.syntax(), + // SyntaxKind::Assignment); format_preceding_comments(&assignment, + // writer, formatter, true)?; formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // let separator = self.separator(); + // format_preceding_comments( + // &SyntaxElement::from(separator.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // separator.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(separator.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for TrueFalseOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let mut true_clause = String::new(); + // let mut false_clause = String::new(); + // let mut which_clause = None; + // for child in self.syntax().children_with_tokens() { + // match child.kind() { + // SyntaxKind::TrueKeyword => { + // which_clause = Some(true); + + // format_preceding_comments(&child, &mut true_clause, formatter, + // true)?; write!(true_clause, "{}", child)?; + // format_inline_comment(&child, &mut true_clause, formatter, + // true)?; } + // SyntaxKind::FalseKeyword => { + // which_clause = Some(false); + + // format_preceding_comments(&child, &mut false_clause, formatter, + // true)?; write!(false_clause, "{}", child)?; + // format_inline_comment(&child, &mut false_clause, formatter, + // true)?; } + // SyntaxKind::Assignment => { + // let cur_clause = match which_clause { + // Some(true) => &mut true_clause, + // Some(false) => &mut false_clause, + // _ => unreachable!( + // "should have found a true or false keyword before an + // assignment" ), + // }; + + // format_preceding_comments(&child, cur_clause, formatter, true)?; + // formatter.space_or_indent(cur_clause)?; + // write!(cur_clause, "{}", child)?; + // format_inline_comment(&child, cur_clause, formatter, true)?; + // } + // SyntaxKind::LiteralStringNode => { + // let cur_clause = match which_clause { + // Some(true) => &mut true_clause, + // Some(false) => &mut false_clause, + // _ => unreachable!( + // "should have found a true or false keyword before a + // string" ), + // }; + + // format_preceding_comments(&child, cur_clause, formatter, true)?; + // formatter.space_or_indent(cur_clause)?; + // let literal_string = LiteralString::cast( + // child + // .as_node() + // .expect("LiteralStringNode should be a node") + // .clone(), + // ) + // .expect("LiteralStringNode should cast to a LiteralString"); + // literal_string.format(cur_clause, formatter)?; + // format_inline_comment(&child, writer, formatter, true)?; + // } + // SyntaxKind::Whitespace => { + // // Ignore + // } + // SyntaxKind::Comment => { + // // Handled by a call to `format_preceding_comments` + // // or `format_inline_comment` in another match arm. + // } + // _ => { + // unreachable!("Unexpected syntax kind: {:?}", child.kind()); + // } + // } + // } + // write!(writer, "{} {}", true_clause, false_clause)?; + + Ok(()) + } +} + +impl Formattable for PlaceholderOption { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + PlaceholderOption::Default(default) => default.format(writer, formatter), + PlaceholderOption::Sep(sep) => sep.format(writer, formatter), + PlaceholderOption::TrueFalse(true_false) => true_false.format(writer, formatter), + } + } +} + +impl Formattable for Placeholder { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // coerce all placeholders into '~{}' placeholders + // (as opposed to '${}' placeholders) + write!(writer, "~{{")?; + + let mut option_present = false; + if let Some(option) = self.options().next() { + option.format(writer, formatter)?; + option_present = true; + } + + let expr = self.expr(); + if option_present { + formatter.space_or_indent(writer)?; + } + expr.format(writer, formatter)?; + + write!(writer, "}}") + } +} + +impl Formattable for StringText { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + let mut iter = self.as_str().chars().peekable(); + let mut prev_c = None; + while let Some(c) = iter.next() { + match c { + '\\' => { + if let Some(next_c) = iter.peek() { + if *next_c == '\'' { + // Do not write this backslash + prev_c = Some(c); + continue; + } + } + writer.write_char(c)?; + } + '"' => { + if let Some(pc) = prev_c { + if pc != '\\' { + writer.write_char('\\')?; + } + } + writer.write_char(c)?; + } + _ => { + writer.write_char(c)?; + } + } + prev_c = Some(c); + } + + Ok(()) + } +} + +impl Formattable for LiteralString { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", STRING_TERMINATOR)?; + for part in self.parts() { + match part { + StringPart::Text(text) => { + text.format(writer, formatter)?; + } + StringPart::Placeholder(placeholder) => { + placeholder.format(writer, formatter)?; + } + } + } + write!(writer, "{}", STRING_TERMINATOR) + } +} + +impl Formattable for LiteralBoolean { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.value()) // TODO + } +} + +impl Formattable for LiteralFloat { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for LiteralInteger { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for Type { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for Expr { + fn format( + &self, + writer: &mut T, + _state: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.syntax()) // TODO + } +} + +impl Formattable for Decl { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let ty = self.ty(); + // formatter.indent(writer)?; + // ty.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(ty.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let name = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // if let Some(expr) = self.expr() { + // let assignment = first_child_of_kind(self.syntax(), + // SyntaxKind::Assignment); format_preceding_comments(&assignment, + // writer, formatter, true)?; formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // } + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for InputSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let input_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::InputKeyword); formatter.indent(writer)?; + // write!(writer, "{}", input_keyword)?; + // format_inline_comment(&input_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for decl in self.declarations() { + // decl.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for OutputSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let output_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::OutputKeyword); formatter.indent(writer)?; + // write!(writer, "{}", output_keyword)?; + // format_inline_comment(&output_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for decl in self.declarations() { + // Decl::Bound(decl).format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for HintsItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let name = self.name(); + // formatter.indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for HintsSection { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let hints_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::HintsKeyword); formatter.indent(writer)?; + // write!(writer, "{}", hints_keyword)?; + // format_inline_comment(&hints_keyword, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for item in self.items() { + // item.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for StructKeyword { + fn format( + &self, + writer: &mut T, + _formatter: &mut Formatter, + ) -> std::fmt::Result { + write!(writer, "{}", self.as_str()) + } +} + +impl Formattable for StructDefinition { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), false, true)?; + + let struct_keyword = self.keyword(); + struct_keyword.format(writer, formatter)?; + formatter.format_inline_comment(writer, struct_keyword.syntax().inline_comment(), true)?; + + let name = self.name(); + formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; + formatter.space_or_indent(writer)?; + name.format(writer, formatter)?; + formatter.format_inline_comment(writer, name.syntax().inline_comment(), true)?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // if let Some(m) = self.metadata().next() { + // m.format(writer, formatter)?; + // write!(writer, "{}", NEWLINE)?; + // } + + // if let Some(pm) = self.parameter_metadata().next() { + // pm.format(writer, formatter)?; + // write!(writer, "{}", NEWLINE)?; + // } + + // for decl in self.members() { + // Decl::Unbound(decl).format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = self + // .syntax() + // .children_with_tokens() + // .find(|element| element.kind() == SyntaxKind::CloseBrace) + // .expect("StructDefinition should have a close brace"); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for DocumentItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + DocumentItem::Import(_) => { + unreachable!("Import statements should not be formatted as a DocumentItem") + } + DocumentItem::Workflow(workflow) => workflow.format(writer, formatter), + DocumentItem::Task(task) => task.format(writer, formatter), + DocumentItem::Struct(structure) => structure.format(writer, formatter), + } + } +} diff --git a/backup/wdl-format-old/src/workflow.rs b/backup/wdl-format-old/src/workflow.rs new file mode 100644 index 000000000..f82e57003 --- /dev/null +++ b/backup/wdl-format-old/src/workflow.rs @@ -0,0 +1,666 @@ +//! A module for formatting elements in workflows. + +use wdl_ast::v1::CallAfter; +use wdl_ast::v1::CallAlias; +use wdl_ast::v1::CallInputItem; +use wdl_ast::v1::CallStatement; +use wdl_ast::v1::ConditionalStatement; +use wdl_ast::v1::Decl; +use wdl_ast::v1::ScatterStatement; +use wdl_ast::v1::WorkflowDefinition; +use wdl_ast::v1::WorkflowItem; +use wdl_ast::v1::WorkflowStatement; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::SyntaxElement; +use wdl_ast::SyntaxKind; + +use super::formatter::SPACE; +use super::Formattable; +use super::Formatter; +use super::NEWLINE; + +impl Formattable for CallAlias { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let as_keyword = first_child_of_kind(self.syntax(), SyntaxKind::AsKeyword); + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", as_keyword)?; + // format_inline_comment(&as_keyword, writer, formatter, true)?; + + // let ident = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // ident.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for CallAfter { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let after_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::AfterKeyword); formatter.space_or_indent(writer)?; + // write!(writer, "{}", after_keyword)?; + // format_inline_comment(&after_keyword, writer, formatter, true)?; + + // let ident = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // ident.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for CallInputItem { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // let name = self.name(); + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // if let Some(expr) = self.expr() { + // let assignment = first_child_of_kind(self.syntax(), + // SyntaxKind::Assignment); format_preceding_comments(&assignment, + // writer, formatter, true)?; formatter.space_or_indent(writer)?; + // write!(writer, "{}", assignment)?; + // format_inline_comment(&assignment, writer, formatter, true)?; + + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // } + + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // true, + // ) + Ok(()) + } +} + +impl Formattable for CallStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let call_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::CallKeyword); formatter.indent(writer)?; + // write!(writer, "{}", call_keyword)?; + // format_inline_comment(&call_keyword, writer, formatter, true)?; + + // let target = self.target(); + // format_preceding_comments( + // &SyntaxElement::Node(target.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", target.syntax())?; + // format_inline_comment( + // &SyntaxElement::Node(target.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // if let Some(alias) = self.alias() { + // alias.format(writer, formatter)?; + // } + + // for after in self.after() { + // after.format(writer, formatter)?; + // } + + // let inputs: Vec<_> = self.inputs().collect(); + // if !inputs.is_empty() { + // let open_brace = first_child_of_kind(self.syntax(), + // SyntaxKind::OpenBrace); format_preceding_comments(&open_brace, + // writer, formatter, true)?; // Open braces should ignore the "+1 + // rule" followed by other interrupted // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, true)?; + + // // TODO consider detecting if document is >= v1.2 and forcing the + // optional input // syntax + // if let Some(input_keyword) = self + // .syntax() + // .children_with_tokens() + // .find(|c| c.kind() == SyntaxKind::InputKeyword) + // { + // format_preceding_comments(&input_keyword, writer, formatter, true)?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", input_keyword)?; + // format_inline_comment(&input_keyword, writer, formatter, true)?; + + // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); + // format_preceding_comments(&colon, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", colon)?; + // format_inline_comment(&colon, writer, formatter, true)?; + // } // else v1.2 syntax + + // if inputs.len() == 1 { + // let input = inputs.first().expect("inputs should have a first + // element"); format_preceding_comments( + // &SyntaxElement::from(input.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // input.format(writer, formatter)?; + // // TODO there may be a trailing comma with comments attached to it + + // let close_brace = first_child_of_kind(self.syntax(), + // SyntaxKind::CloseBrace); format_preceding_comments(& + // close_brace, writer, formatter, true)?; formatter. + // space_or_indent(writer)?; write!(writer, "{}", close_brace)?; + // } else { + // // multiple inputs + // let mut commas = self + // .syntax() + // .children_with_tokens() + // .filter(|c| c.kind() == SyntaxKind::Comma); + + // formatter.increment_indent(); + + // for input in inputs { + // if !formatter.interrupted() { + // write!(writer, "{}", NEWLINE)?; + // } else { + // formatter.reset_interrupted(); + // } + // format_preceding_comments( + // &SyntaxElement::from(input.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + // formatter.indent(writer)?; + // input.format(writer, formatter)?; + // if let Some(cur_comma) = commas.next() { + // format_preceding_comments(&cur_comma, writer, formatter, + // true)?; write!(writer, ",")?; + // format_inline_comment(&cur_comma, writer, formatter, true)?; + // } else { + // write!(writer, ",")?; + // } + // } + // if !formatter.interrupted() { + // write!(writer, "{}", NEWLINE)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), + // SyntaxKind::CloseBrace); format_preceding_comments(& + // close_brace, writer, formatter, false)?; formatter. + // indent(writer)?; write!(writer, "{}", close_brace)?; + // } + // } + + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for ConditionalStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let if_keyword = first_child_of_kind(self.syntax(), SyntaxKind::IfKeyword); + // formatter.indent(writer)?; + // write!(writer, "{}", if_keyword)?; + // format_inline_comment(&if_keyword, writer, formatter, true)?; + + // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); + // format_preceding_comments(&open_paren, writer, formatter, true)?; + // // Open parens should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_paren)?; + + // let mut paren_on_same_line = true; + // let expr = self.expr(); + // // PERF: This calls `to_string()` which is also called later by `format()` + // // There should be a way to avoid this. + // let multiline_expr = expr.syntax().to_string().contains(NEWLINE); + + // format_inline_comment(&open_paren, writer, formatter, !multiline_expr)?; + // if multiline_expr { + // formatter.increment_indent(); + // paren_on_same_line = false; + // } + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // !multiline_expr, + // )?; + // if formatter.interrupted() || multiline_expr { + // formatter.indent(writer)?; + // paren_on_same_line = false; + // } + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // !multiline_expr, + // )?; + // if formatter.interrupted() { + // paren_on_same_line = false; + // } + + // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); + // format_preceding_comments(&close_paren, writer, formatter, !multiline_expr)?; + // if formatter.interrupted() || !paren_on_same_line { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", close_paren)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for stmt in self.statements() { + // stmt.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for ScatterStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let scatter_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::ScatterKeyword); formatter.indent(writer)?; + // write!(writer, "{}", scatter_keyword)?; + // format_inline_comment(&scatter_keyword, writer, formatter, true)?; + + // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); + // format_preceding_comments(&open_paren, writer, formatter, true)?; + // // Open parens should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_paren)?; + // format_inline_comment(&open_paren, writer, formatter, true)?; + + // let ident = self.variable(); + // format_preceding_comments( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // ident.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(ident.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let in_keyword = first_child_of_kind(self.syntax(), SyntaxKind::InKeyword); + // format_preceding_comments(&in_keyword, writer, formatter, true)?; + // formatter.space_or_indent(writer)?; + // write!(writer, "{}", in_keyword)?; + // format_inline_comment(&in_keyword, writer, formatter, true)?; + + // let expr = self.expr(); + // format_preceding_comments( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // expr.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(expr.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); + // format_preceding_comments(&close_paren, writer, formatter, true)?; + // if formatter.interrupted() { + // formatter.indent(writer)?; + // } + // write!(writer, "{}", close_paren)?; + // format_inline_comment(&close_paren, writer, formatter, true)?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // for stmt in self.statements() { + // stmt.format(writer, formatter)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} + +impl Formattable for WorkflowStatement { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + match self { + WorkflowStatement::Call(c) => c.format(writer, formatter), + WorkflowStatement::Conditional(c) => c.format(writer, formatter), + WorkflowStatement::Scatter(s) => s.format(writer, formatter), + WorkflowStatement::Declaration(d) => Decl::Bound(d.clone()).format(writer, formatter), + } + } +} + +impl Formattable for WorkflowDefinition { + fn format( + &self, + writer: &mut T, + formatter: &mut Formatter, + ) -> std::fmt::Result { + // format_preceding_comments( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // )?; + + // let workflow_keyword = first_child_of_kind(self.syntax(), + // SyntaxKind::WorkflowKeyword); write!(writer, "{}", + // workflow_keyword)?; format_inline_comment(&workflow_keyword, writer, + // formatter, true)?; + + // let name = self.name(); + // format_preceding_comments( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + // formatter.space_or_indent(writer)?; + // name.format(writer, formatter)?; + // format_inline_comment( + // &SyntaxElement::from(name.syntax().clone()), + // writer, + // formatter, + // true, + // )?; + + // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); + // format_preceding_comments(&open_brace, writer, formatter, true)?; + // // Open braces should ignore the "+1 rule" followed by other interrupted + // // elements. + // if formatter.interrupted() { + // formatter.reset_interrupted(); + // formatter.indent(writer)?; + // } else { + // write!(writer, "{}", SPACE)?; + // } + // write!(writer, "{}", open_brace)?; + // format_inline_comment(&open_brace, writer, formatter, false)?; + + // formatter.increment_indent(); + + // let mut meta_section_str = String::new(); + // let mut parameter_meta_section_str = String::new(); + // let mut input_section_str = String::new(); + // let mut body_str = String::new(); + // let mut output_section_str = String::new(); + // let mut hints_section_str = String::new(); + + // for item in self.items() { + // match item { + // WorkflowItem::Metadata(m) => { + // m.format(&mut meta_section_str, formatter)?; + // } + // WorkflowItem::ParameterMetadata(pm) => { + // pm.format(&mut parameter_meta_section_str, formatter)?; + // } + // WorkflowItem::Input(i) => { + // i.format(&mut input_section_str, formatter)?; + // } + // WorkflowItem::Call(c) => { + // c.format(&mut body_str, formatter)?; + // } + // WorkflowItem::Conditional(c) => { + // c.format(&mut body_str, formatter)?; + // } + // WorkflowItem::Scatter(s) => { + // s.format(&mut body_str, formatter)?; + // } + // WorkflowItem::Declaration(d) => { + // Decl::Bound(d).format(&mut body_str, formatter)?; + // } + // WorkflowItem::Output(o) => { + // o.format(&mut output_section_str, formatter)?; + // } + // WorkflowItem::Hints(h) => { + // h.format(&mut hints_section_str, formatter)?; + // } + // } + // } + + // let mut first_section = true; + // if !meta_section_str.is_empty() { + // first_section = false; + // write!(writer, "{}", meta_section_str)?; + // } + // if !parameter_meta_section_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", parameter_meta_section_str)?; + // } + // if !input_section_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", input_section_str)?; + // } + // if !body_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", body_str)?; + // } + // if !output_section_str.is_empty() { + // if first_section { + // first_section = false; + // } else { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", output_section_str)?; + // } + // if !hints_section_str.is_empty() { + // if !first_section { + // write!(writer, "{}", NEWLINE)?; + // } + // write!(writer, "{}", hints_section_str)?; + // } + + // formatter.decrement_indent(); + + // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); + // format_preceding_comments(&close_brace, writer, formatter, false)?; + // formatter.indent(writer)?; + // write!(writer, "{}", close_brace)?; + // format_inline_comment( + // &SyntaxElement::from(self.syntax().clone()), + // writer, + // formatter, + // false, + // ) + Ok(()) + } +} diff --git a/backup/wdl-format-old/tests/format.rs b/backup/wdl-format-old/tests/format.rs new file mode 100644 index 000000000..d7d6dc5d8 --- /dev/null +++ b/backup/wdl-format-old/tests/format.rs @@ -0,0 +1,192 @@ +//! The format file tests. +//! +//! This test looks for directories in `tests/format`. +//! +//! Each directory is expected to contain: +//! +//! * `source.wdl` - the test input source to parse. +//! * `source.formatted` - the expected formatted output. +//! +//! The `source.formatted` file may be automatically generated or updated by +//! setting the `BLESS` environment variable when running this test. + +use std::collections::HashSet; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::exit; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use codespan_reporting::files::SimpleFile; +use codespan_reporting::term; +use codespan_reporting::term::termcolor::Buffer; +use codespan_reporting::term::Config; +use colored::Colorize; +use pretty_assertions::StrComparison; +use rayon::prelude::*; +use wdl_ast::Diagnostic; +use wdl_format::format_document; + +fn find_tests() -> Vec { + // Check for filter arguments consisting of test names + let mut filter = HashSet::new(); + for arg in std::env::args().skip_while(|a| a != "--").skip(1) { + if !arg.starts_with('-') { + filter.insert(arg); + } + } + + let mut tests: Vec = Vec::new(); + for entry in Path::new("tests/format").read_dir().unwrap() { + let entry = entry.expect("failed to read directory"); + let path = entry.path(); + if !path.is_dir() + || (!filter.is_empty() + && !filter.contains(entry.file_name().to_str().expect("name should be UTF-8"))) + { + continue; + } + + tests.push(path); + } + + tests.sort(); + tests +} + +fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { + let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); + let mut buffer = Buffer::no_color(); + for diagnostic in diagnostics { + term::emit( + &mut buffer, + &Config::default(), + &file, + &diagnostic.to_codespan(), + ) + .expect("should emit"); + } + + String::from_utf8(buffer.into_inner()).expect("should be UTF-8") +} + +fn compare_result(path: &Path, result: &str) -> Result<(), String> { + if env::var_os("BLESS").is_some() { + fs::write(path, &result).map_err(|e| { + format!( + "failed to write result file `{path}`: {e}", + path = path.display() + ) + })?; + return Ok(()); + } + + let expected = fs::read_to_string(path) + .map_err(|e| { + format!( + "failed to read result file `{path}`: {e}", + path = path.display() + ) + })? + .replace("\r\n", "\n"); + + if expected != result { + return Err(format!( + "result is not as expected:\n{}", + StrComparison::new(&expected, &result), + )); + } + + Ok(()) +} + +fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { + let path = test.join("source.wdl"); + let source = std::fs::read_to_string(&path).map_err(|e| { + format!( + "failed to read source file `{path}`: {e}", + path = path.display() + ) + })?; + + let formatted = format_document(&source).map_err(|e| { + format!( + "failed to format `{path}`: {e}", + path = path.display(), + e = format_diagnostics(&e, path.as_path(), &source) + ) + })?; + compare_result(path.with_extension("formatted.wdl").as_path(), &formatted)?; + + ntests.fetch_add(1, Ordering::SeqCst); + Ok(()) +} + +fn main() { + let tests = find_tests(); + println!("\nrunning {} tests\n", tests.len()); + + let ntests = AtomicUsize::new(0); + let errors = tests + .par_iter() + .filter_map(|test| { + let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); + match std::panic::catch_unwind(|| { + match run_test(test, &ntests) + .map_err(|e| format!("failed to run test `{path}`: {e}", path = test.display())) + .err() + { + Some(e) => { + println!("test {test_name} ... {failed}", failed = "failed".red()); + Some((test_name, e)) + } + None => { + println!("test {test_name} ... {ok}", ok = "ok".green()); + None + } + } + }) { + Ok(result) => result, + Err(e) => { + println!( + "test {test_name} ... {panicked}", + panicked = "panicked".red() + ); + Some(( + test_name, + format!( + "test panicked: {e:?}", + e = e + .downcast_ref::() + .map(|s| s.as_str()) + .or_else(|| e.downcast_ref::<&str>().copied()) + .unwrap_or("no panic message") + ), + )) + } + } + }) + .collect::>(); + + if !errors.is_empty() { + eprintln!( + "\n{count} test(s) {failed}:", + count = errors.len(), + failed = "failed".red() + ); + + for (name, msg) in errors.iter() { + eprintln!("{name}: {msg}", msg = msg.red()); + } + + exit(1); + } + + println!( + "\ntest result: ok. {} passed\n", + ntests.load(Ordering::SeqCst) + ); +} diff --git a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt new file mode 100644 index 000000000..d9a98e06c --- /dev/null +++ b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt @@ -0,0 +1,25 @@ +'source.wdl' obtained from: https://github.com/ENCODE-DCC/chip-seq-pipeline2/blob/26eeda81a0540dc793fc69b0c390d232ca7ca50a/chip.wdl +on the date 08-05-2024. +It was accompanied by the following license: + +MIT License + +Copyright (c) 2017 ENCODE DCC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl new file mode 100644 index 000000000..7c8de0324 --- /dev/null +++ b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -0,0 +1 @@ +version 1.0 diff --git a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl new file mode 100644 index 000000000..92c09ea84 --- /dev/null +++ b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl @@ -0,0 +1,3296 @@ +version 1.0 + +struct RuntimeEnvironment { + String docker + String singularity + String conda +} + +workflow chip { + String pipeline_ver = 'v2.2.2' + + meta { + version: 'v2.2.2' + + author: 'Jin wook Lee' + email: 'leepc12@gmail.com' + description: 'ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil.' + organization: 'ENCODE DCC' + + specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing' + + default_docker: 'encodedcc/chip-seq-pipeline:v2.2.2' + default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json' + + parameter_group: { + runtime_environment: { + title: 'Runtime environment', + description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.' + }, + pipeline_metadata: { + title: 'Pipeline metadata', + description: 'Metadata for a pipeline (e.g. title and description).' + }, + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.', + help: 'Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input genomic data', + description: 'Genomic input files for experiment.', + help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].' + }, + input_genomic_data_control: { + title: 'Input genomic data (control)', + description: 'Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.', + help: 'Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.' + }, + pipeline_parameter: { + title: 'Pipeline parameter', + description: 'Pipeline type and flags to turn on/off analyses.', + help: 'Use chip.align_only to align FASTQs without peak calling.' + }, + alignment: { + title: 'Alignment', + description: 'Parameters for alignment.', + help: 'Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.' + }, + peak_calling: { + title: 'Peak calling', + description: 'Parameters for peak calling.', + help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.' + }, + resource_parameter: { + title: 'Resource parameter', + description: 'Number of CPUs (threads), max. memory and walltime for tasks.', + help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.' + } + } + } + input { + # group: runtime_environment + String docker = 'encodedcc/chip-seq-pipeline:v2.2.2' + String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + String conda = 'encd-chip' + String conda_macs2 = 'encd-chip-macs2' + String conda_spp = 'encd-chip-spp' + + # group: pipeline_metadata + String title = 'Untitled' + String description = 'No description' + + # group: reference_genome + File? genome_tsv + String? genome_name + File? ref_fa + File? bwa_idx_tar + File? bowtie2_idx_tar + File? chrsz + File? blacklist + File? blacklist2 + String? mito_chr_name + String? regex_bfilt_peak_chr_name + String? gensz + File? custom_aligner_idx_tar + + # group: input_genomic_data + Boolean? paired_end + Array[Boolean] paired_ends = [] + Array[File] fastqs_rep1_R1 = [] + Array[File] fastqs_rep1_R2 = [] + Array[File] fastqs_rep2_R1 = [] + Array[File] fastqs_rep2_R2 = [] + Array[File] fastqs_rep3_R1 = [] + Array[File] fastqs_rep3_R2 = [] + Array[File] fastqs_rep4_R1 = [] + Array[File] fastqs_rep4_R2 = [] + Array[File] fastqs_rep5_R1 = [] + Array[File] fastqs_rep5_R2 = [] + Array[File] fastqs_rep6_R1 = [] + Array[File] fastqs_rep6_R2 = [] + Array[File] fastqs_rep7_R1 = [] + Array[File] fastqs_rep7_R2 = [] + Array[File] fastqs_rep8_R1 = [] + Array[File] fastqs_rep8_R2 = [] + Array[File] fastqs_rep9_R1 = [] + Array[File] fastqs_rep9_R2 = [] + Array[File] fastqs_rep10_R1 = [] + Array[File] fastqs_rep10_R2 = [] + Array[File] bams = [] + Array[File] nodup_bams = [] + Array[File] tas = [] + Array[File] peaks = [] + Array[File] peaks_pr1 = [] + Array[File] peaks_pr2 = [] + File? peak_ppr1 + File? peak_ppr2 + File? peak_pooled + + Boolean? ctl_paired_end + Array[Boolean] ctl_paired_ends = [] + Array[File] ctl_fastqs_rep1_R1 = [] + Array[File] ctl_fastqs_rep1_R2 = [] + Array[File] ctl_fastqs_rep2_R1 = [] + Array[File] ctl_fastqs_rep2_R2 = [] + Array[File] ctl_fastqs_rep3_R1 = [] + Array[File] ctl_fastqs_rep3_R2 = [] + Array[File] ctl_fastqs_rep4_R1 = [] + Array[File] ctl_fastqs_rep4_R2 = [] + Array[File] ctl_fastqs_rep5_R1 = [] + Array[File] ctl_fastqs_rep5_R2 = [] + Array[File] ctl_fastqs_rep6_R1 = [] + Array[File] ctl_fastqs_rep6_R2 = [] + Array[File] ctl_fastqs_rep7_R1 = [] + Array[File] ctl_fastqs_rep7_R2 = [] + Array[File] ctl_fastqs_rep8_R1 = [] + Array[File] ctl_fastqs_rep8_R2 = [] + Array[File] ctl_fastqs_rep9_R1 = [] + Array[File] ctl_fastqs_rep9_R2 = [] + Array[File] ctl_fastqs_rep10_R1 = [] + Array[File] ctl_fastqs_rep10_R2 = [] + Array[File] ctl_bams = [] + Array[File] ctl_nodup_bams = [] + Array[File] ctl_tas = [] + + # group: pipeline_parameter + String pipeline_type + Boolean align_only = false + Boolean redact_nodup_bam = false + Boolean true_rep_only = false + Boolean enable_count_signal_track = false + Boolean enable_jsd = true + Boolean enable_gc_bias = true + + # group: alignment + String aligner = 'bowtie2' + File? custom_align_py + Boolean use_bwa_mem_for_pe = false + Int bwa_mem_read_len_limit = 70 + Boolean use_bowtie2_local_mode = false + Int crop_length = 0 + Int crop_length_tol = 2 + String trimmomatic_phred_score_format = 'auto' + Int xcor_trim_bp = 50 + Boolean use_filt_pe_ta_for_xcor = false + String dup_marker = 'picard' + Boolean no_dup_removal = false + Int mapq_thresh = 30 + Array[String] filter_chrs = [] + Int subsample_reads = 0 + Int ctl_subsample_reads = 0 + Int xcor_subsample_reads = 15000000 + Int xcor_exclusion_range_min = -500 + Int? xcor_exclusion_range_max + Int pseudoreplication_random_seed = 0 + + # group: peak_calling + Int ctl_depth_limit = 200000000 + Float exp_ctl_depth_ratio_limit = 5.0 + Array[Int?] fraglen = [] + String? peak_caller + Boolean always_use_pooled_ctl = true + Float ctl_depth_ratio = 1.2 + Int? cap_num_peak + Float pval_thresh = 0.01 + Float fdr_thresh = 0.01 + Float idr_thresh = 0.05 + + # group: resource_parameter + Int align_cpu = 6 + Float align_bowtie2_mem_factor = 0.15 + Float align_bwa_mem_factor = 1.0 + Int align_time_hr = 48 + Float align_bowtie2_disk_factor = 8.0 + Float align_bwa_disk_factor = 8.0 + + Int filter_cpu = 4 + Float filter_mem_factor = 0.4 + Int filter_time_hr = 24 + Float filter_disk_factor = 8.0 + + Int bam2ta_cpu = 2 + Float bam2ta_mem_factor = 0.35 + Int bam2ta_time_hr = 6 + Float bam2ta_disk_factor = 4.0 + + Float spr_mem_factor = 20.0 + Float spr_disk_factor = 30.0 + + Int jsd_cpu = 4 + Float jsd_mem_factor = 0.1 + Int jsd_time_hr = 6 + Float jsd_disk_factor = 2.0 + + Int xcor_cpu = 2 + Float xcor_mem_factor = 1.0 + Int xcor_time_hr = 24 + Float xcor_disk_factor = 4.5 + + Float subsample_ctl_mem_factor = 22.0 + Float subsample_ctl_disk_factor = 15.0 + + Float macs2_signal_track_mem_factor = 12.0 + Int macs2_signal_track_time_hr = 24 + Float macs2_signal_track_disk_factor = 80.0 + + Int call_peak_cpu = 6 + Float call_peak_spp_mem_factor = 5.0 + Float call_peak_macs2_mem_factor = 5.0 + Int call_peak_time_hr = 72 + Float call_peak_spp_disk_factor = 5.0 + Float call_peak_macs2_disk_factor = 30.0 + + String? align_trimmomatic_java_heap + String? filter_picard_java_heap + String? gc_bias_picard_java_heap + } + + parameter_meta { + docker: { + description: 'Default Docker image URI to run WDL tasks.', + group: 'runtime_environment', + example: 'ubuntu:20.04' + } + singularity: { + description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.', + group: 'runtime_environment', + example: 'docker://ubuntu:20.04' + } + conda: { + description: 'Default Conda environment name to run WDL tasks. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip' + } + conda_macs2: { + description: 'Conda environment name for task macs2. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-macs2' + } + conda_spp: { + description: 'Conda environment name for tasks spp/xcor. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-spp' + } + title: { + description: 'Experiment title.', + group: 'pipeline_metadata', + example: 'ENCSR936XTK (subsampled 1/50)' + } + description: { + description: 'Experiment description.', + group: 'pipeline_metadata', + example: 'ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)' + } + genome_tsv: { + description: 'Reference genome database TSV.', + group: 'reference_genome', + help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.', + example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv' + } + genome_name: { + description: 'Genome name.', + group: 'reference_genome' + } + ref_fa: { + description: 'Reference FASTA file.', + group: 'reference_genome' + } + bowtie2_idx_tar: { + description: 'BWA index TAR file.', + group: 'reference_genome' + } + custom_aligner_idx_tar: { + description: 'Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.', + group: 'reference_genome' + } + chrsz: { + description: '2-col chromosome sizes file.', + group: 'reference_genome' + } + blacklist: { + description: 'Blacklist file in BED format.', + group: 'reference_genome', + help: 'Peaks will be filtered with this file.' + } + blacklist2: { + description: 'Secondary blacklist file in BED format.', + group: 'reference_genome', + help: 'If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.' + } + mito_chr_name: { + description: 'Mitochondrial chromosome name.', + group: 'reference_genome', + help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.' + } + regex_bfilt_peak_chr_name: { + description: 'Reg-ex for chromosomes to keep while filtering peaks.', + group: 'reference_genome', + help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.' + } + gensz: { + description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.', + group: 'reference_genome' + } + paired_end: { + description: 'Sequencing endedness.', + group: 'input_genomic_data', + help: 'Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.', + example: true + } + paired_ends: { + description: 'Sequencing endedness array (for mixed SE/PE datasets).', + group: 'input_genomic_data', + help: 'Whether each biological replicate is paired ended or not.' + } + fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + bams: { + description: 'List of unfiltered/raw BAM files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].' + } + nodup_bams: { + description: 'List of filtered/deduped BAM files for each biological replicate', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].' + } + tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].' + } + peaks: { + description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.' + } + peaks_pr1: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peaks_pr2: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peak_pooled: { + description: 'NARROWPEAK file for pooled true replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.' + } + peak_ppr1: { + description: 'NARROWPEAK file for pooled pseudo replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.' + } + peak_ppr2: { + description: 'NARROWPEAK file for pooled pseudo replicate 2.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.' + } + + ctl_paired_end: { + description: 'Sequencing endedness for all controls.', + group: 'input_genomic_data_control', + help: 'Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.' + } + ctl_paired_ends: { + description: 'Sequencing endedness array for mixed SE/PE controls.', + group: 'input_genomic_data_control', + help: 'Whether each control replicate is paired ended or not.' + } + ctl_fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_bams: { + description: 'List of unfiltered/raw BAM files for each control replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].' + } + ctl_nodup_bams: { + description: 'List of filtered/deduped BAM files for each control replicate', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].' + } + ctl_tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].' + } + + pipeline_type: { + description: 'Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.', + group: 'pipeline_parameter', + help: 'Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.', + choices: ['tf', 'histone', 'control'], + example: 'tf' + } + redact_nodup_bam: { + description: 'Redact filtered/nodup BAM.', + group: 'pipeline_parameter', + help: 'Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.' + } + align_only: { + description: 'Align only mode.', + group: 'pipeline_parameter', + help: 'Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.' + } + true_rep_only: { + description: 'Disables all analyses related to pseudo-replicates.', + group: 'pipeline_parameter', + help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).' + } + enable_count_signal_track: { + description: 'Enables generation of count signal tracks.', + group: 'pipeline_parameter' + } + enable_jsd: { + description: 'Enables Jensen-Shannon Distance (JSD) plot generation.', + group: 'pipeline_parameter' + } + enable_gc_bias: { + description: 'Enables GC bias calculation.', + group: 'pipeline_parameter' + } + + aligner: { + description: 'Aligner. bowtie2, bwa or custom', + group: 'alignment', + help: 'It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + choices: ['bowtie2', 'bwa', 'custom'], + example: 'bowtie2' + } + custom_align_py: { + description: 'Python script for a custom aligner.', + group: 'alignment', + help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".' + } + use_bwa_mem_for_pe: { + description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.', + group: 'alignment', + help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.' + } + bwa_mem_read_len_limit: { + description: 'Read length limit for bwa mem (for PE FASTQs only).', + group: 'alignment', + help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.' + } + use_bowtie2_local_mode: { + description: 'Use bowtie2\'s local mode (soft-clipping).', + group: 'alignment', + help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.' + } + crop_length: { + description: 'Crop FASTQs\' reads longer than this length.', + group: 'alignment', + help: 'Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.' + } + crop_length_tol: { + description: 'Tolerance for cropping reads in FASTQs.', + group: 'alignment', + help: 'Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.' + } + trimmomatic_phred_score_format: { + description: 'Base encoding (format) for Phred score in FASTQs.', + group: 'alignment', + choices: ['auto', 'phred33', 'phred64'], + help: 'This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".' + } + xcor_trim_bp: { + description: 'Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.', + group: 'alignment', + help: 'This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.' + } + use_filt_pe_ta_for_xcor: { + description: 'Use filtered PE BAM for cross-correlation analysis.', + group: 'alignment', + help: 'If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.' + } + dup_marker: { + description: 'Marker for duplicate reads. picard or sambamba.', + group: 'alignment', + help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.', + choices: ['picard', 'sambamba'], + example: 'picard' + } + no_dup_removal: { + description: 'Disable removal of duplicate reads during filtering BAM.', + group: 'alignment', + help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.' + } + mapq_thresh: { + description: 'Threshold for low MAPQ reads removal.', + group: 'alignment', + help: 'Low MAPQ reads are filtered out while filtering BAM.' + } + filter_chrs: { + description: 'List of chromosomes to be filtered out while filtering BAM.', + group: 'alignment', + help: 'It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.' + } + subsample_reads: { + description: 'Subsample reads. Shuffle and subsample reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + ctl_subsample_reads: { + description: 'Subsample control reads. Shuffle and subsample control reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + xcor_subsample_reads: { + description: 'Subsample reads for cross-corrlelation analysis only.', + group: 'alignment', + help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.' + } + xcor_exclusion_range_min: { + description: 'Exclusion minimum for cross-correlation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.' + } + xcor_exclusion_range_max: { + description: 'Exclusion maximum for cross-coorrelation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used' + } + pseudoreplication_random_seed: { + description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).', + group: 'alignment', + help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.' + } + ctl_depth_limit: { + description: 'Hard limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.' + } + exp_ctl_depth_ratio_limit: { + description: 'Second limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.' + } + fraglen: { + description: 'Fragment length for each biological replicate.', + group: 'peak_calling', + help: 'Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.' + } + peak_caller: { + description: 'Peak caller.', + group: 'peak_calling', + help: 'It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).', + example: 'spp' + } + always_use_pooled_ctl: { + description: 'Always choose a pooled control for each experiment replicate.', + group: 'peak_calling', + help: 'If turned on, ignores chip.ctl_depth_ratio.' + } + ctl_depth_ratio: { + description: 'Maximum depth ratio between control replicates.', + group: 'peak_calling', + help: 'If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.' + } + + cap_num_peak: { + description: 'Upper limit on the number of peaks.', + group: 'peak_calling', + help: 'It is 30000000 and 50000000 by default for spp and macs2, respectively.' + } + pval_thresh: { + description: 'p-value Threshold for MACS2 peak caller.', + group: 'peak_calling', + help: 'macs2 callpeak -p' + } + fdr_thresh: { + description: 'FDR threshold for spp peak caller (phantompeakqualtools).', + group: 'peak_calling', + help: 'run_spp.R -fdr=' + } + idr_thresh: { + description: 'IDR threshold.', + group: 'peak_calling' + } + + align_cpu: { + description: 'Number of cores for task align.', + group: 'resource_parameter', + help: 'Task align merges/crops/maps FASTQs.' + } + align_bowtie2_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_bwa_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_time_hr: { + description: 'Walltime (h) required for task align.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + align_bowtie2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + align_bwa_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + filter_cpu: { + description: 'Number of cores for task filter.', + group: 'resource_parameter', + help: 'Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.' + } + filter_mem_factor: { + description: 'Multiplication factor to determine memory required for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + filter_time_hr: { + description: 'Walltime (h) required for task filter.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + filter_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.' + } + bam2ta_cpu: { + description: 'Number of cores for task bam2ta.', + group: 'resource_parameter', + help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.' + } + bam2ta_mem_factor: { + description: 'Multiplication factor to determine memory required for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + bam2ta_time_hr: { + description: 'Walltime (h) required for task bam2ta.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + bam2ta_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + spr_mem_factor: { + description: 'Multiplication factor to determine memory required for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + spr_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + jsd_cpu: { + description: 'Number of cores for task jsd.', + group: 'resource_parameter', + help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.' + } + jsd_mem_factor: { + description: 'Multiplication factor to determine memory required for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + jsd_time_hr: { + description: 'Walltime (h) required for task jsd.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + jsd_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + xcor_cpu: { + description: 'Number of cores for task xcor.', + group: 'resource_parameter', + help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.' + } + xcor_mem_factor: { + description: 'Multiplication factor to determine memory required for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + xcor_time_hr: { + description: 'Walltime (h) required for task xcor.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + xcor_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + subsample_ctl_mem_factor: { + description: 'Multiplication factor to determine memory required for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + subsample_ctl_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_cpu: { + description: 'Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.', + group: 'resource_parameter', + help: 'Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.' + } + call_peak_spp_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_macs2_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_time_hr: { + description: 'Walltime (h) required for task call_peak.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + call_peak_spp_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_macs2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + macs2_signal_track_mem_factor: { + description: 'Multiplication factor to determine memory required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + macs2_signal_track_time_hr: { + description: 'Walltime (h) required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + macs2_signal_track_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + align_trimmomatic_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task align.', + group: 'resource_parameter', + help: 'Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.' + } + filter_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task filter.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.' + } + gc_bias_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task gc_bias.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.' + } + } + RuntimeEnvironment runtime_environment = { + 'docker': docker, 'singularity': singularity, 'conda': conda + } + RuntimeEnvironment runtime_environment_spp = { + 'docker': docker, 'singularity': singularity, 'conda': conda_spp + } + RuntimeEnvironment runtime_environment_macs2 = { + 'docker': docker, 'singularity': singularity, 'conda': conda_macs2 + } + + # read genome data and paths + if ( defined(genome_tsv) ) { + call read_genome_tsv { input: + genome_tsv = genome_tsv, + runtime_environment = runtime_environment + } + } + File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa]) + File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar + else read_genome_tsv.bwa_idx_tar + File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar]) + File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz]) + String gensz_ = select_first([gensz, read_genome_tsv.gensz]) + File? blacklist1_ = if defined(blacklist) then blacklist + else read_genome_tsv.blacklist + File? blacklist2_ = if defined(blacklist2) then blacklist2 + else read_genome_tsv.blacklist2 + # merge multiple blacklists + # two blacklists can have different number of columns (3 vs 6) + # so we limit merged blacklist's columns to 3 + Array[File] blacklists = select_all([blacklist1_, blacklist2_]) + if ( length(blacklists) > 1 ) { + call pool_ta as pool_blacklist { input: + tas = blacklists, + col = 3, + runtime_environment = runtime_environment + } + } + File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled + else if length(blacklists) > 0 then blacklists[0] + else blacklist2_ + String mito_chr_name_ = select_first([mito_chr_name, read_genome_tsv.mito_chr_name]) + String regex_bfilt_peak_chr_name_ = select_first([regex_bfilt_peak_chr_name, read_genome_tsv.regex_bfilt_peak_chr_name]) + String genome_name_ = select_first([genome_name, read_genome_tsv.genome_name, basename(chrsz_)]) + + ### temp vars (do not define these) + String aligner_ = if defined(custom_align_py) then 'custom' else aligner + String peak_caller_ = if pipeline_type=='tf' then select_first([peak_caller, 'spp']) + else select_first([peak_caller, 'macs2']) + String peak_type_ = if peak_caller_=='spp' then 'regionPeak' + else 'narrowPeak' + Boolean enable_idr = pipeline_type=='tf' # enable_idr for TF chipseq only + String idr_rank_ = if peak_caller_=='spp' then 'signal.value' + else if peak_caller_=='macs2' then 'p.value' + else 'p.value' + Int cap_num_peak_spp = 300000 + Int cap_num_peak_macs2 = 500000 + Int cap_num_peak_ = if peak_caller_ == 'spp' then select_first([cap_num_peak, cap_num_peak_spp]) + else select_first([cap_num_peak, cap_num_peak_macs2]) + Int mapq_thresh_ = mapq_thresh + Boolean enable_xcor_ = if pipeline_type=='control' then false else true + Boolean enable_count_signal_track_ = if pipeline_type=='control' then false else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type=='control' then false else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type=='control' then false else enable_gc_bias + Boolean align_only_ = if pipeline_type=='control' then true else align_only + + Float align_mem_factor_ = if aligner_ =='bowtie2' then align_bowtie2_mem_factor + else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ =='bowtie2' then align_bowtie2_disk_factor + else align_bwa_disk_factor + Float call_peak_mem_factor_ = if peak_caller_ =='spp' then call_peak_spp_mem_factor + else call_peak_macs2_mem_factor + Float call_peak_disk_factor_ = if peak_caller_ =='spp' then call_peak_spp_disk_factor + else call_peak_macs2_disk_factor + + # temporary 2-dim fastqs array [rep_id][merge_id] + Array[Array[File]] fastqs_R1 = + if length(fastqs_rep10_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1, fastqs_rep10_R1] + else if length(fastqs_rep9_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1] + else if length(fastqs_rep8_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1] + else if length(fastqs_rep7_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1] + else if length(fastqs_rep6_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1] + else if length(fastqs_rep5_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1] + else if length(fastqs_rep4_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1] + else if length(fastqs_rep3_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1] + else if length(fastqs_rep2_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1] + else if length(fastqs_rep1_R1)>0 then + [fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] fastqs_R2 = + [fastqs_rep1_R2, fastqs_rep2_R2, fastqs_rep3_R2, fastqs_rep4_R2, fastqs_rep5_R2, + fastqs_rep6_R2, fastqs_rep7_R2, fastqs_rep8_R2, fastqs_rep9_R2, fastqs_rep10_R2] + + # temporary 2-dim ctl fastqs array [rep_id][merge_id] + Array[Array[File]] ctl_fastqs_R1 = + if length(ctl_fastqs_rep10_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1] + else if length(ctl_fastqs_rep9_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1] + else if length(ctl_fastqs_rep8_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1] + else if length(ctl_fastqs_rep7_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1] + else if length(ctl_fastqs_rep6_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1] + else if length(ctl_fastqs_rep5_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1] + else if length(ctl_fastqs_rep4_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1] + else if length(ctl_fastqs_rep3_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1] + else if length(ctl_fastqs_rep2_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1] + else if length(ctl_fastqs_rep1_R1)>0 then + [ctl_fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] ctl_fastqs_R2 = + [ctl_fastqs_rep1_R2, ctl_fastqs_rep2_R2, ctl_fastqs_rep3_R2, ctl_fastqs_rep4_R2, ctl_fastqs_rep5_R2, + ctl_fastqs_rep6_R2, ctl_fastqs_rep7_R2, ctl_fastqs_rep8_R2, ctl_fastqs_rep9_R2, ctl_fastqs_rep10_R2] + + # temporary variables to get number of replicates + # WDLic implementation of max(A,B,C,...) + Int num_rep_fastq = length(fastqs_R1) + Int num_rep_bam = if length(bams) 0 || num_ctl_fastq > 0) && aligner_ != 'bwa' && aligner_ != 'bowtie2' && aligner_ != 'custom' ) { + call raise_exception as error_wrong_aligner { input: + msg = 'Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bwa' && use_bwa_mem_for_pe ) { + call raise_exception as error_use_bwa_mem_for_non_bwa { input: + msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) { + call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: + msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) { + call raise_exception as error_custom_aligner { input: + msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + runtime_environment = runtime_environment + } + } + + if ( ( ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0 ) && num_ctl > 1 && length(ctl_paired_ends) > 1 ) { + call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: + msg = 'Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for ' + + 'multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). ' + + 'Automatic control subsampling is enabled by default. ' + + 'Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. ' + + 'You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done ' + + 'for individual control\'s TAG-ALIGN output according to each control\'s endedness. ', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_ctl > 0 ) { + call raise_exception as error_ctl_input_defined_in_control_mode { input: + msg = 'In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_rep_fastq == 0 ) { + call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: + msg = 'Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.', + runtime_environment = runtime_environment + } + } + + # align each replicate + scatter(i in range(num_rep)) { + # to override endedness definition for individual replicate + # paired_end will override paired_ends[i] + Boolean paired_end_ = if !defined(paired_end) && i0 + Boolean has_output_of_align = i0 + Boolean has_output_of_align_ctl = i1 ) { + # pool tagaligns from true replicates + call pool_ta { input : + tas = ta_, + prefix = 'rep', + runtime_environment = runtime_environment + } + } + + # if there are pr1 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 1 + call pool_ta as pool_ta_pr1 { input : + tas = spr.ta_pr1, + prefix = 'rep-pr1', + runtime_environment = runtime_environment + } + } + + # if there are pr2 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 2 + call pool_ta as pool_ta_pr2 { input : + tas = spr.ta_pr2, + prefix = 'rep-pr2', + runtime_environment = runtime_environment + } + } + + # if there are CTL TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_))==num_ctl + if ( has_all_inputs_of_pool_ta_ctl && num_ctl>1 ) { + # pool tagaligns from true replicates + call pool_ta as pool_ta_ctl { input : + tas = ctl_ta_, + prefix = 'ctl', + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) + if ( has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep>1 ) { + call count_signal_track as count_signal_track_pooled { input : + ta = pool_ta.ta_pooled, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_))==num_rep + if ( has_input_of_jsd && num_rep > 0 && enable_jsd_ ) { + # fingerprint and JS-distance plot + call jsd { input : + nodup_bams = nodup_bam_, + ctl_bams = ctl_nodup_bam_, # use first control only + blacklist = blacklist_, + mapq_thresh = mapq_thresh_, + + cpu = jsd_cpu, + mem_factor = jsd_mem_factor, + time_hr = jsd_time_hr, + disk_factor = jsd_disk_factor, + runtime_environment = runtime_environment + } + } + + Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep + && length(select_all(ctl_ta_))==num_ctl && num_ctl > 0 + if ( has_all_input_of_choose_ctl && !align_only_ ) { + # choose appropriate control for each exp IP replicate + # outputs: + # choose_ctl.idx : control replicate index for each exp replicate + # -1 means pooled ctl replicate + call choose_ctl { input: + tas = ta_, + ctl_tas = ctl_ta_, + ta_pooled = pool_ta.ta_pooled, + ctl_ta_pooled = pool_ta_ctl.ta_pooled, + always_use_pooled_ctl = always_use_pooled_ctl, + ctl_depth_ratio = ctl_depth_ratio, + ctl_depth_limit = ctl_depth_limit, + exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, + runtime_environment = runtime_environment + } + } + + scatter(i in range(num_rep)) { + # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] + # chosen_ctl_ta_id + # >=0: control TA index (this means that control TA with this index exists) + # -1: use pooled control + # -2: there is no control + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_ids])[i] else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample])[i] else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false + else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] + else ctl_paired_end_[chosen_ctl_ta_id] + + if ( chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0 ) { + call subsample_ctl { input: + ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled + else ctl_ta_[ chosen_ctl_ta_id ], + subsample = chosen_ctl_ta_subsample, + paired_end = chosen_ctl_paired_end, + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] + else if chosen_ctl_ta_subsample > 0 then [ select_first([subsample_ctl.ta_subsampled]) ] + else if chosen_ctl_ta_id == -1 then [ select_first([pool_ta_ctl.ta_pooled]) ] + else [ select_first([ctl_ta_[ chosen_ctl_ta_id ]]) ] + } + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample_pooled]) else 0 + + # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) + Array[Int] fraglen_tmp = select_all(fraglen_) + + # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks + scatter(i in range(num_rep)) { + Boolean has_input_of_call_peak = defined(ta_[i]) + Boolean has_output_of_call_peak = i 1 ) { + # rounded mean of fragment length, which will be used for + # 1) calling peaks for pooled true/pseudo replicates + # 2) calculating FRiP + call rounded_mean as fraglen_mean { input : + ints = fraglen_tmp, + runtime_environment = runtime_environment + } + # } + + if ( has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0 ) { + call subsample_ctl as subsample_ctl_pooled { input: + ta = if num_ctl < 2 then ctl_ta_[0] + else pool_ta_ctl.ta_pooled, + subsample = chosen_ctl_ta_pooled_subsample, + paired_end = ctl_paired_end_[0], + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + # actually not an array + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] + else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled ] + else if num_ctl < 2 then [ ctl_ta_[0] ] + else [ pool_ta_ctl.ta_pooled ] + + Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) + Boolean has_output_of_call_peak_pooled = defined(peak_pooled) + if ( has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + # call peaks on pooled replicate + # always call peaks for pooled replicate to get signal tracks + call call_peak as call_peak_pooled { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled + else call_peak_pooled.peak + + # macs2 signal track for pooled rep + if ( has_input_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + call macs2_signal_track as macs2_signal_track_pooled { input : + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_mean.rounded_mean, + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2 + } + } + + Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) + Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) + if ( has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 1st pooled pseudo replicates + call call_peak as call_peak_ppr1 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr1.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 + else call_peak_ppr1.peak + + Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) + Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) + if ( has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 2nd pooled pseudo replicates + call call_peak as call_peak_ppr2 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr2.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 + else call_peak_ppr2.peak + + # do IDR/overlap on all pairs of two replicates (i,j) + # where i and j are zero-based indices and 0 <= i < j < num_rep + scatter( pair in cross(range(num_rep),range(num_rep)) ) { + # pair.left = 0-based index of 1st replicate + # pair.right = 0-based index of 2nd replicate + File? peak1_ = peak_[pair.left] + File? peak2_ = peak_[pair.right] + if ( !align_only_ && pair.left 1 ) { + # Naive overlap on pooled pseudo replicates + call overlap as overlap_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 1 && enable_idr ) { + # IDR on pooled pseduo replicates + call idr as idr_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + idr_thresh = idr_thresh, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + # reproducibility QC for overlap/IDR peaks + if ( !align_only_ && !true_rep_only && num_rep > 0 ) { + # reproducibility QC for overlapping peaks + call reproducibility as reproducibility_overlap { input : + prefix = 'overlap', + peaks = select_all(overlap.bfilt_overlap_peak), + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([overlap_pr.bfilt_overlap_peak]) else [], + peak_ppr = overlap_ppr.bfilt_overlap_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 0 && enable_idr ) { + # reproducibility QC for IDR peaks + call reproducibility as reproducibility_idr { input : + prefix = 'idr', + peaks = select_all(idr.bfilt_idr_peak), + peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([idr_pr.bfilt_idr_peak]) else [], + peak_ppr = idr_ppr.bfilt_idr_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + # Generate final QC report and JSON + call qc_report { input : + pipeline_ver = pipeline_ver, + title = title, + description = description, + genome = genome_name_, + paired_ends = paired_end_, + ctl_paired_ends = ctl_paired_end_, + pipeline_type = pipeline_type, + aligner = aligner_, + no_dup_removal = no_dup_removal, + peak_caller = peak_caller_, + cap_num_peak = cap_num_peak_, + idr_thresh = idr_thresh, + pval_thresh = pval_thresh, + xcor_trim_bp = xcor_trim_bp, + xcor_subsample_reads = xcor_subsample_reads, + + samstat_qcs = select_all(align.samstat_qc), + nodup_samstat_qcs = select_all(filter.samstat_qc), + dup_qcs = select_all(filter.dup_qc), + lib_complexity_qcs = select_all(filter.lib_complexity_qc), + xcor_plots = select_all(xcor.plot_png), + xcor_scores = select_all(xcor.score), + + ctl_samstat_qcs = select_all(align_ctl.samstat_qc), + ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), + ctl_dup_qcs = select_all(filter_ctl.dup_qc), + ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), + + jsd_plot = jsd.plot, + jsd_qcs = if defined(jsd.jsd_qcs) then select_first([jsd.jsd_qcs]) else [], + + frip_qcs = select_all(call_peak.frip_qc), + frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), + frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), + frip_qc_pooled = call_peak_pooled.frip_qc, + frip_qc_ppr1 = call_peak_ppr1.frip_qc, + frip_qc_ppr2 = call_peak_ppr2.frip_qc, + + idr_plots = select_all(idr.idr_plot), + idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([idr_pr.idr_plot]) else [], + idr_plot_ppr = idr_ppr.idr_plot, + frip_idr_qcs = select_all(idr.frip_qc), + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([idr_pr.frip_qc]) else [], + frip_idr_qc_ppr = idr_ppr.frip_qc, + frip_overlap_qcs = select_all(overlap.frip_qc), + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([overlap_pr.frip_qc]) else [], + frip_overlap_qc_ppr = overlap_ppr.frip_qc, + idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, + overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, + + gc_plots = select_all(gc_bias.gc_plot), + + peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), + peak_region_size_plots = select_all(call_peak.peak_region_size_plot), + num_peak_qcs = select_all(call_peak.num_peak_qc), + + idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, + idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, + + overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, + overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, + + runtime_environment = runtime_environment + } + + output { + File report = qc_report.report + File qc_json = qc_report.qc_json + Boolean qc_json_ref_match = qc_report.qc_json_ref_match + } +} + +task align { + input { + Array[File] fastqs_R1 # [merge_id] + Array[File] fastqs_R2 + File? ref_fa + Int? trim_bp # this is for R1 only + Int crop_length + Int crop_length_tol + String? trimmomatic_phred_score_format + + String aligner + + String mito_chr_name + Int? multimapping + File? custom_align_py + File? idx_tar # reference index tar + Boolean paired_end + Boolean use_bwa_mem_for_pe + Int bwa_mem_read_len_limit + Boolean use_bowtie2_local_mode + + String? trimmomatic_java_heap + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") + Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) + + Float trimmomatic_java_heap_factor = 0.9 + Array[Array[File]] tmp_fastqs = if paired_end then transpose([fastqs_R1, fastqs_R2]) + else transpose([fastqs_R1]) + command { + set -e + + # check if pipeline dependencies can be found + if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] + then + echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 + exit 3 + fi + python3 $(which encode_task_merge_fastq.py) \ + ${write_tsv(tmp_fastqs)} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--nth ' + cpu} + + if [ -z '${trim_bp}' ]; then + SUFFIX= + else + SUFFIX=_trimmed + python3 $(which encode_task_trim_fastq.py) \ + R1/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R1$SUFFIX + if [ '${paired_end}' == 'true' ]; then + python3 $(which encode_task_trim_fastq.py) \ + R2/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R2$SUFFIX + fi + fi + if [ '${crop_length}' == '0' ]; then + SUFFIX=$SUFFIX + else + NEW_SUFFIX="$SUFFIX"_cropped + python3 $(which encode_task_trimmomatic.py) \ + --fastq1 R1$SUFFIX/*.fastq.gz \ + ${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + --crop-length ${crop_length} \ + --crop-length-tol "${crop_length_tol}" \ + ${'--phred-score-format ' + trimmomatic_phred_score_format } \ + --out-dir-R1 R1$NEW_SUFFIX \ + ${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \ + ${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + 'G')} \ + ${'--nth ' + cpu} + SUFFIX=$NEW_SUFFIX + fi + + if [ '${aligner}' == 'bwa' ]; then + python3 $(which encode_task_bwa.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \ + ${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + + elif [ '${aligner}' == 'bowtie2' ]; then + python3 $(which encode_task_bowtie2.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${'--multimapping ' + multimapping} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bowtie2_local_mode then '--local' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + else + python3 ${custom_align_py} \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_align.py) \ + R1$SUFFIX/*.fastq.gz $(ls *.bam) \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + rm -rf R1 R2 R1$SUFFIX R2$SUFFIX + } + output { + File bam = glob('*.bam')[0] + File bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File read_len_log = glob('*.read_length.txt')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task filter { + input { + File? bam + Boolean paired_end + File? ref_fa + Boolean redact_nodup_bam + String dup_marker # picard.jar MarkDuplicates (picard) or + # sambamba markdup (sambamba) + Int mapq_thresh # threshold for low MAPQ reads removal + Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM + File chrsz # 2-col chromosome sizes file + Boolean no_dup_removal # no dupe reads removal when filtering BAM + String mito_chr_name + + Int cpu + Float mem_factor + String? picard_java_heap + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float picard_java_heap_factor = 0.9 + Float mem_gb = 6.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_filter.py) \ + ${bam} \ + ${if paired_end then '--paired-end' else ''} \ + --multimapping 0 \ + ${'--dup-marker ' + dup_marker} \ + ${'--mapq-thresh ' + mapq_thresh} \ + --filter-chrs ${sep=' ' filter_chrs} \ + ${'--chrsz ' + chrsz} \ + ${if no_dup_removal then '--no-dup-removal' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + + if [ '${redact_nodup_bam}' == 'true' ]; then + python3 $(which encode_task_bam_to_pbam.py) \ + $(ls *.bam) \ + ${'--ref-fa ' + ref_fa} \ + '--delete-original-bam' + fi + } + output { + File nodup_bam = glob('*.bam')[0] + File nodup_bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File dup_qc = glob('*.dup.qc')[0] + File lib_complexity_qc = glob('*.lib_complexity.qc')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task bam2ta { + input { + File? bam + Boolean paired_end + String mito_chr_name # mito chromosome name + Int subsample # number of reads to subsample TAGALIGN + # this affects all downstream analysis + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_bam2ta.py) \ + ${bam} \ + --disable-tn5-shift \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + } + output { + File ta = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task spr { + input { + File? ta + Boolean paired_end + Int pseudoreplication_random_seed + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_spr.py) \ + ${ta} \ + ${'--pseudoreplication-random-seed ' + pseudoreplication_random_seed} \ + ${if paired_end then '--paired-end' else ''} + } + output { + File ta_pr1 = glob('*.pr1.tagAlign.gz')[0] + File ta_pr2 = glob('*.pr2.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task pool_ta { + input { + Array[File?] tas + Int? col # number of columns in pooled TA + String? prefix # basename prefix + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_pool_ta.py) \ + ${sep=' ' select_all(tas)} \ + ${'--prefix ' + prefix} \ + ${'--col ' + col} + } + output { + File ta_pooled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '8 GB' + time : 4 + disks : 'local-disk 100 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task xcor { + input { + File? ta + Boolean paired_end + String mito_chr_name + Int subsample # number of reads to subsample TAGALIGN + # this will be used for xcor only + # will not affect any downstream analysis + String? chip_seq_type + Int? exclusion_range_min + Int? exclusion_range_max + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 8.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_xcor.py) \ + ${ta} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--chip-seq-type ' + chip_seq_type} \ + ${'--exclusion-range-min ' + exclusion_range_min} \ + ${'--exclusion-range-max ' + exclusion_range_max} \ + ${'--subsample ' + subsample} \ + ${'--nth ' + cpu} + } + output { + File plot_pdf = glob('*.cc.plot.pdf')[0] + File plot_png = glob('*.cc.plot.png')[0] + File score = glob('*.cc.qc')[0] + File fraglen_log = glob('*.cc.fraglen.txt')[0] + Int fraglen = read_int(fraglen_log) + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task jsd { + input { + Array[File?] nodup_bams + Array[File?] ctl_bams + File? blacklist + Int mapq_thresh + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") + Float mem_gb = 5.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_jsd.py) \ + ${sep=' ' select_all(nodup_bams)} \ + ${if length(ctl_bams)>0 then '--ctl-bam '+ select_first(ctl_bams) else ''} \ + ${'--mapq-thresh '+ mapq_thresh} \ + ${'--blacklist '+ blacklist} \ + ${'--nth ' + cpu} + } + output { + File plot = glob('*.png')[0] + Array[File] jsd_qcs = glob('*.jsd.qc') + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task choose_ctl { + input { + Array[File?] tas + Array[File?] ctl_tas + File? ta_pooled + File? ctl_ta_pooled + Boolean always_use_pooled_ctl # always use pooled control for all exp rep. + Float ctl_depth_ratio # if ratio between controls is higher than this + # then always use pooled control for all exp rep. + Int ctl_depth_limit + Float exp_ctl_depth_ratio_limit + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_choose_ctl.py) \ + --tas ${sep=' ' select_all(tas)} \ + --ctl-tas ${sep=' ' select_all(ctl_tas)} \ + ${'--ta-pooled ' + ta_pooled} \ + ${'--ctl-ta-pooled ' + ctl_ta_pooled} \ + ${if always_use_pooled_ctl then '--always-use-pooled-ctl' else ''} \ + ${'--ctl-depth-ratio ' + ctl_depth_ratio} \ + ${'--ctl-depth-limit ' + ctl_depth_limit} \ + ${'--exp-ctl-depth-ratio-limit ' + exp_ctl_depth_ratio_limit} + } + output { + File chosen_ctl_id_tsv = glob('chosen_ctl.tsv')[0] + File chosen_ctl_subsample_tsv = glob('chosen_ctl_subsample.tsv')[0] + File chosen_ctl_subsample_pooled_txt = glob('chosen_ctl_subsample_pooled.txt')[0] + Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) + Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) + Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task count_signal_track { + input { + File? ta # tag-align + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + Float mem_gb = 8.0 + + command { + set -e + python3 $(which encode_task_count_signal_track.py) \ + ${ta} \ + ${'--chrsz ' + chrsz} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pos_bw = glob('*.positive.bigwig')[0] + File neg_bw = glob('*.negative.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task subsample_ctl { + input { + File? ta + Boolean paired_end + Int subsample + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + python3 $(which encode_task_subsample_ctl.py) \ + ${ta} \ + ${'--subsample ' + subsample} \ + ${if paired_end then '--paired-end' else ''} \ + } + output { + File ta_subsampled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task call_peak { + input { + String peak_caller + String peak_type + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Int cap_num_peak # cap number of raw peaks called from MACS2 + Float pval_thresh # p.value threshold for MACS2 + Float? fdr_thresh # FDR threshold for SPP + + File? blacklist # blacklist BED to filter raw peaks + String? regex_bfilt_peak_chr_name + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + + if [ '${peak_caller}' == 'macs2' ]; then + python3 $(which encode_task_macs2_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + + elif [ '${peak_caller}' == 'spp' ]; then + python3 $(which encode_task_spp.py) \ + ${sep=' ' select_all(tas)} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--fdr-thresh '+ fdr_thresh} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_call_peak_chip.py) \ + $(ls *Peak.gz) \ + ${'--ta ' + tas[0]} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--peak-type ' + peak_type} \ + ${'--blacklist ' + blacklist} + } + output { + File peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + # generated by post_call_peak py + File bfilt_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = glob('*.frip.qc')[0] + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : if peak_caller == 'macs2' then 2 else cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task macs2_signal_track { + input { + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Float pval_thresh # p.value threshold + + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_macs2_signal_track_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pval_bw = glob('*.pval.signal.bigwig')[0] + File fc_bw = glob('*.fc.signal.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task idr { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + Float idr_thresh + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor + File chrsz # 2-col chromosome sizes file + String peak_type + String rank + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_idr.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--idr-thresh ' + idr_thresh} \ + ${'--peak-type ' + peak_type} \ + --idr-rank ${rank} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File idr_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_idr_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_idr_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_idr_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_idr_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_idr_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File idr_plot = glob('*.txt.png')[0] + File idr_unthresholded_peak = glob('*.txt.gz')[0] + File idr_log = glob('*.idr*.log')[0] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task overlap { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor (for FRIP) + File chrsz # 2-col chromosome sizes file + String peak_type + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_overlap.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + --nonamecheck \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File overlap_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_overlap_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_overlap_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_overlap_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_overlap_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_overlap_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task reproducibility { + input { + String prefix + Array[File] peaks # peak files from pair of true replicates + # in a sorted order. for example of 4 replicates, + # 1,2 1,3 1,4 2,3 2,4 3,4. + # x,y means peak file from rep-x vs rep-y + Array[File] peaks_pr # peak files from pseudo replicates + File? peak_ppr # Peak file from pooled pseudo replicate. + String peak_type + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_reproducibility.py) \ + ${sep=' ' peaks} \ + --peaks-pr ${sep=' ' peaks_pr} \ + ${'--peak-ppr '+ peak_ppr} \ + --prefix ${prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--chrsz ' + chrsz} + } + output { + File optimal_peak = glob('*optimal_peak.*.gz')[0] + File optimal_peak_bb = glob('*optimal_peak.*.bb')[0] + File optimal_peak_starch = glob('*optimal_peak.*.starch')[0] + File optimal_peak_hammock = glob('*optimal_peak.*.hammock.gz*')[0] + File optimal_peak_hammock_tbi = glob('*optimal_peak.*.hammock.gz*')[1] + File conservative_peak = glob('*conservative_peak.*.gz')[0] + File conservative_peak_bb = glob('*conservative_peak.*.bb')[0] + File conservative_peak_starch = glob('*conservative_peak.*.starch')[0] + File conservative_peak_hammock = glob('*conservative_peak.*.hammock.gz*')[0] + File conservative_peak_hammock_tbi = glob('*conservative_peak.*.hammock.gz*')[1] + File reproducibility_qc = glob('*reproducibility.qc')[0] + # QC metrics for optimal peak + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task gc_bias { + input { + File? nodup_bam + File ref_fa + + String? picard_java_heap + + RuntimeEnvironment runtime_environment + } + Float mem_factor = 0.3 + Float input_file_size_gb = size(nodup_bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float picard_java_heap_factor = 0.9 + + command { + set -e + python3 $(which encode_task_gc_bias.py) \ + ${'--nodup-bam ' + nodup_bam} \ + ${'--ref-fa ' + ref_fa} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + } + output { + File gc_plot = glob('*.gc_plot.png')[0] + File gc_log = glob('*.gc.txt')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 6 + disks : 'local-disk 250 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task qc_report { + input { + # optional metadata + String pipeline_ver + String title # name of sample + String description # description for sample + String? genome + #String? encode_accession_id # ENCODE accession ID of sample + # workflow params + Array[Boolean] paired_ends + Array[Boolean] ctl_paired_ends + String pipeline_type + String aligner + Boolean no_dup_removal + String peak_caller + Int cap_num_peak + Float idr_thresh + Float pval_thresh + Int xcor_trim_bp + Int xcor_subsample_reads + # QCs + Array[File] samstat_qcs + Array[File] nodup_samstat_qcs + Array[File] dup_qcs + Array[File] lib_complexity_qcs + Array[File] ctl_samstat_qcs + Array[File] ctl_nodup_samstat_qcs + Array[File] ctl_dup_qcs + Array[File] ctl_lib_complexity_qcs + Array[File] xcor_plots + Array[File] xcor_scores + File? jsd_plot + Array[File] jsd_qcs + Array[File] idr_plots + Array[File] idr_plots_pr + File? idr_plot_ppr + Array[File] frip_qcs + Array[File] frip_qcs_pr1 + Array[File] frip_qcs_pr2 + File? frip_qc_pooled + File? frip_qc_ppr1 + File? frip_qc_ppr2 + Array[File] frip_idr_qcs + Array[File] frip_idr_qcs_pr + File? frip_idr_qc_ppr + Array[File] frip_overlap_qcs + Array[File] frip_overlap_qcs_pr + File? frip_overlap_qc_ppr + File? idr_reproducibility_qc + File? overlap_reproducibility_qc + + Array[File] gc_plots + + Array[File] peak_region_size_qcs + Array[File] peak_region_size_plots + Array[File] num_peak_qcs + + File? idr_opt_peak_region_size_qc + File? idr_opt_peak_region_size_plot + File? idr_opt_num_peak_qc + + File? overlap_opt_peak_region_size_qc + File? overlap_opt_peak_region_size_plot + File? overlap_opt_num_peak_qc + + File? qc_json_ref + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ + ${'--pipeline-ver ' + pipeline_ver} \ + ${"--title '" + sub(title,"'","_") + "'"} \ + ${"--desc '" + sub(description,"'","_") + "'"} \ + ${'--genome ' + genome} \ + ${'--multimapping ' + 0} \ + --paired-ends ${sep=' ' paired_ends} \ + --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ + --pipeline-type ${pipeline_type} \ + --aligner ${aligner} \ + ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ + --peak-caller ${peak_caller} \ + ${'--cap-num-peak ' + cap_num_peak} \ + --idr-thresh ${idr_thresh} \ + --pval-thresh ${pval_thresh} \ + --xcor-trim-bp ${xcor_trim_bp} \ + --xcor-subsample-reads ${xcor_subsample_reads} \ + --samstat-qcs ${sep='_:_' samstat_qcs} \ + --nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \ + --dup-qcs ${sep='_:_' dup_qcs} \ + --lib-complexity-qcs ${sep='_:_' lib_complexity_qcs} \ + --xcor-plots ${sep='_:_' xcor_plots} \ + --xcor-scores ${sep='_:_' xcor_scores} \ + --idr-plots ${sep='_:_' idr_plots} \ + --idr-plots-pr ${sep='_:_' idr_plots_pr} \ + --ctl-samstat-qcs ${sep='_:_' ctl_samstat_qcs} \ + --ctl-nodup-samstat-qcs ${sep='_:_' ctl_nodup_samstat_qcs} \ + --ctl-dup-qcs ${sep='_:_' ctl_dup_qcs} \ + --ctl-lib-complexity-qcs ${sep='_:_' ctl_lib_complexity_qcs} \ + ${'--jsd-plot ' + jsd_plot} \ + --jsd-qcs ${sep='_:_' jsd_qcs} \ + ${'--idr-plot-ppr ' + idr_plot_ppr} \ + --frip-qcs ${sep='_:_' frip_qcs} \ + --frip-qcs-pr1 ${sep='_:_' frip_qcs_pr1} \ + --frip-qcs-pr2 ${sep='_:_' frip_qcs_pr2} \ + ${'--frip-qc-pooled ' + frip_qc_pooled} \ + ${'--frip-qc-ppr1 ' + frip_qc_ppr1} \ + ${'--frip-qc-ppr2 ' + frip_qc_ppr2} \ + --frip-idr-qcs ${sep='_:_' frip_idr_qcs} \ + --frip-idr-qcs-pr ${sep='_:_' frip_idr_qcs_pr} \ + ${'--frip-idr-qc-ppr ' + frip_idr_qc_ppr} \ + --frip-overlap-qcs ${sep='_:_' frip_overlap_qcs} \ + --frip-overlap-qcs-pr ${sep='_:_' frip_overlap_qcs_pr} \ + ${'--frip-overlap-qc-ppr ' + frip_overlap_qc_ppr} \ + ${'--idr-reproducibility-qc ' + idr_reproducibility_qc} \ + ${'--overlap-reproducibility-qc ' + overlap_reproducibility_qc} \ + --gc-plots ${sep='_:_' gc_plots} \ + --peak-region-size-qcs ${sep='_:_' peak_region_size_qcs} \ + --peak-region-size-plots ${sep='_:_' peak_region_size_plots} \ + --num-peak-qcs ${sep='_:_' num_peak_qcs} \ + ${'--idr-opt-peak-region-size-qc ' + idr_opt_peak_region_size_qc} \ + ${'--idr-opt-peak-region-size-plot ' + idr_opt_peak_region_size_plot} \ + ${'--idr-opt-num-peak-qc ' + idr_opt_num_peak_qc} \ + ${'--overlap-opt-peak-region-size-qc ' + overlap_opt_peak_region_size_qc} \ + ${'--overlap-opt-peak-region-size-plot ' + overlap_opt_peak_region_size_plot} \ + ${'--overlap-opt-num-peak-qc ' + overlap_opt_num_peak_qc} \ + --out-qc-html qc.html \ + --out-qc-json qc.json \ + ${'--qc-json-ref ' + qc_json_ref} + } + output { + File report = glob('*qc.html')[0] + File qc_json = glob('*qc.json')[0] + Boolean qc_json_ref_match = read_string('qc_json_ref_match.txt')=='True' + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +### workflow system tasks +task read_genome_tsv { + input { + File? genome_tsv + String? null_s + + RuntimeEnvironment runtime_environment + } + command <<< + echo "$(basename ~{genome_tsv})" > genome_name + # create empty files for all entries + touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 + touch mito_chr_name + touch regex_bfilt_peak_chr_name + + python <>> + output { + String? genome_name = read_string('genome_name') + String? ref_fa = if size('ref_fa')==0 then null_s else read_string('ref_fa') + String? bwa_idx_tar = if size('bwa_idx_tar')==0 then null_s else read_string('bwa_idx_tar') + String? bowtie2_idx_tar = if size('bowtie2_idx_tar')==0 then null_s else read_string('bowtie2_idx_tar') + String? chrsz = if size('chrsz')==0 then null_s else read_string('chrsz') + String? gensz = if size('gensz')==0 then null_s else read_string('gensz') + String? blacklist = if size('blacklist')==0 then null_s else read_string('blacklist') + String? blacklist2 = if size('blacklist2')==0 then null_s else read_string('blacklist2') + String? mito_chr_name = if size('mito_chr_name')==0 then null_s else read_string('mito_chr_name') + String? regex_bfilt_peak_chr_name = if size('regex_bfilt_peak_chr_name')==0 then 'chr[\\dXY]+' + else read_string('regex_bfilt_peak_chr_name') + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task rounded_mean { + input { + Array[Int] ints + + RuntimeEnvironment runtime_environment + } + command <<< + python <>> + output { + Int rounded_mean = read_int('tmp.txt') + } + runtime { + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task raise_exception { + input { + String msg + + RuntimeEnvironment runtime_environment + } + command { + echo -e "\n* Error: ${msg}\n" >&2 + exit 2 + } + output { + String error_msg = '${msg}' + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl b/backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl new file mode 100644 index 000000000..281e31443 --- /dev/null +++ b/backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl @@ -0,0 +1,7 @@ +## # Header +# regular comment will be left as is +## part of preamble + +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput +version 1.2 diff --git a/backup/wdl-format-old/tests/format/clays_complex_script/source.wdl b/backup/wdl-format-old/tests/format/clays_complex_script/source.wdl new file mode 100644 index 000000000..81faa4fa6 --- /dev/null +++ b/backup/wdl-format-old/tests/format/clays_complex_script/source.wdl @@ -0,0 +1,165 @@ +## # Header +# regular comment will be left as is +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput + +## part of preamble +version 1.2 + +#@ except: MissingMetas +struct AStruct { + String member +} + +task a_task { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input + # Here is a comment before the input. + { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + command <<< >>> + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } + + requirements + # This is a comment before the requirements. + { + container: "ubuntu:latest" + } + + hints { + max_cpu: 1 + } +} + +## These double-pound-sign comments +## should be converted to single-pound-sign comments. +workflow hello { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] ## This should be converted to a single-pound-sign comment. + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + call a_task { + } + + scatter (name in name_array) { + call say_task { greeting = greeting } + } + + if (some_condition_task) { + call a_task as task_two {} + } + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } +} \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl b/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl new file mode 100644 index 000000000..7c8de0324 --- /dev/null +++ b/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -0,0 +1 @@ +version 1.0 diff --git a/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl b/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl new file mode 100644 index 000000000..7e3333f0a --- /dev/null +++ b/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl @@ -0,0 +1,106 @@ +version +1.0 +workflow +test_wf +{ +input +{ +SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { +noncanonical_motifs: 30, +GT_AG_and_CT_AC_motif: 12, +} +} +parameter_meta +{ +out_sj_filter_overhang_min: { +type: "SpliceJunctionMotifs", +label: "Minimum overhang required to support a splicing junction" +} +} +output +{ +SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min +String a = "friend" +Int b = 1 + 2 +String c = "Hello, ~{a}" +Map[String, Int] d = { "a": 0, "b": 1, "c": 2} +} +meta { +a: "hello" +b: 'world' +c: 5 +d: -0xf +e: 1.0e10 +f: -2. +g: true +h: false +i: null +j: { +a: [1, 2, 3], +b: ["hello", "world", "!"], +c: { +x: 1, +y: 2, +z: 3 +} +} +k: [ +{ +a: {}, +b: 0, +c: "", +d: '', +e: [], +}, +{ +x: [1.0, 2.0, 3.0] +} +] +} +call no_params +call with_params { input: a, b, c, d = 1 } +call qualified.name +call qualified.name { input: a = 1, b = 2, c = "3" } +call aliased as x +call aliased as x { input: } +call f after x after y +call f after x after y { input: a = [] } +call f as x after x +call f as x after x after y { input: name = "hello" } +call test_task as foo { +input: bowchicka = "wowwow" +} +if ( +true +) { + +call test_task after foo { +input: bowchicka = "bowchicka" +} +scatter (i in range(3)) { +call test_task as bar { +input: bowchicka = i * 42 +} +} +} + +} +task +test_task +{ +command <<<>>> +input { +String bowchicka +} +parameter_meta { +bowchicka: { +type: "String", +label: "Bowchicka" +} +} +} + +struct SpliceJunctionMotifs { +Int noncanonical_motifs +Int GT_AG_and_CT_AC_motif +} diff --git a/backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl new file mode 100644 index 000000000..6a2d1da70 --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl @@ -0,0 +1,23 @@ +version 1.1 + +# fileA 1.1 +import # fileA 1.2 + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" # fileA 2.3 + # fileA 3.1 + as # fileA 3.2 + # fileA 4.1 + bar # fileA 4.2 + # fileA 5.1 + alias # fileA 5.2 + # fileA 6.1 + qux # fileA 6.2 + # fileA 7.1 + as # fileA 7.2 + # fileA 8.1 + Qux # fileA 8.2 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# this comment belongs to fileC +import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl new file mode 100644 index 000000000..1c32809f6 --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# fileA 1.1 +import # fileA 1.2 +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" # fileA 2.3 +# fileA 3.1 +as # fileA 3.2 +# fileA 4.1 +bar # fileA 4.2 +# fileA 5.1 +alias # fileA 5.2 +# fileA 6.1 +qux # fileA 6.2 +# fileA 7.1 +as # fileA 7.2 +# fileA 8.1 +Qux # fileA 8.2 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl new file mode 100644 index 000000000..e23115af6 --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl @@ -0,0 +1,12 @@ +version 1.0 + +import # fileA 1 + "fileA.wdl" # fileA 2 + as # fileA 3 + bar # fileA 4 + alias # fileA 5 + qux # fileA 6 + as # fileA 7 + Qux # fileA 8 +import "fileB.wdl" as foo # fileB +import "fileC.wdl" # fileC diff --git a/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl new file mode 100644 index 000000000..f633e72d8 --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl @@ -0,0 +1,12 @@ +version 1.0 +import "fileB.wdl" as foo # fileB +workflow test {} +import "fileC.wdl" # fileC +import # fileA 1 +"fileA.wdl" # fileA 2 +as # fileA 3 +bar # fileA 4 +alias # fileA 5 +qux # fileA 6 +as # fileA 7 +Qux # fileA 8 diff --git a/backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl new file mode 100644 index 000000000..564a6c05f --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl @@ -0,0 +1,5 @@ +version 1.1 + +import "fileA.wdl" as bar alias cows as horses alias cats as dogs +import "fileB.wdl" as foo +import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl new file mode 100644 index 000000000..e69a1a727 --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl @@ -0,0 +1,7 @@ + version 1.1 + + import "fileB.wdl" as foo + import "fileA.wdl" as bar alias cows as horses + alias cats as dogs + workflow test {} + import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl new file mode 100644 index 000000000..8b07048e9 --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl @@ -0,0 +1,23 @@ +version 1.1 + +# fileA 1 +import + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" + # fileA 3 + as + # fileA 4 + bar + # fileA 5 + alias + # fileA 6 + qux + # fileA 7 + as + # fileA 8 + Qux +# this comment belongs to fileB +import "fileB.wdl" as foo +# this comment belongs to fileC +import "fileC.wdl" diff --git a/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl new file mode 100644 index 000000000..a27e7a4fc --- /dev/null +++ b/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" +# this comment belongs to fileB +import "fileB.wdl" as foo +# fileA 1 +import +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" +# fileA 3 +as +# fileA 4 +bar +# fileA 5 +alias +# fileA 6 +qux +# fileA 7 +as +# fileA 8 +Qux diff --git a/backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl b/backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl new file mode 100644 index 000000000..6cd003333 --- /dev/null +++ b/backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl @@ -0,0 +1,2 @@ +version # interrupt + 1.2 # how far should '1.2' be indented? diff --git a/backup/wdl-format-old/tests/format/interrupt_example/source.wdl b/backup/wdl-format-old/tests/format/interrupt_example/source.wdl new file mode 100644 index 000000000..30e667287 --- /dev/null +++ b/backup/wdl-format-old/tests/format/interrupt_example/source.wdl @@ -0,0 +1,10 @@ +version # interrupt +1.2 # how far should '1.2' be indented? + +workflow +# interrupt +test # should this be indented? +# interrupt +{ meta # interrupt +{ # how far should this bracket be indented? +}} \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt b/backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt new file mode 100644 index 000000000..335221306 --- /dev/null +++ b/backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt @@ -0,0 +1,205 @@ +'source.wdl' obtained from: https://github.com/stjude/seaseq/blob/49493a7097e655671b915171e6debe40fa284200/seaseq-case.wdl +on the date 08-05-2024. +It was accompanied by the following license: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl b/backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl new file mode 100644 index 000000000..a25a05879 --- /dev/null +++ b/backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl @@ -0,0 +1,17 @@ +version 1.0 + +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/tasks/sicer.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra +import "workflows/workflows/bamtogff.wdl" +import "workflows/workflows/mapping.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/workflows/visualization.wdl" as viz diff --git a/backup/wdl-format-old/tests/format/seaseq-case/source.wdl b/backup/wdl-format-old/tests/format/seaseq-case/source.wdl new file mode 100644 index 000000000..94c76656e --- /dev/null +++ b/backup/wdl-format-old/tests/format/seaseq-case/source.wdl @@ -0,0 +1,898 @@ +version 1.0 +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/workflows/bamtogff.wdl" +import "workflows/tasks/sicer.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/workflows/visualization.wdl" as viz +import "workflows/workflows/mapping.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra + +workflow seaseq { + String pipeline_ver = 'v2.0.0' + + meta { + title: 'SEAseq Analysis' + summary: 'Single-End Antibody Sequencing (SEAseq) Pipeline' + description: 'A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis.' + version: '2.0.0' + details: { + citation: 'https://doi.org/10.1186/s12859-022-04588-z', + contactEmail: 'modupeore.adetunji@stjude.org', + contactOrg: "St Jude Children's Research Hospital", + contactUrl: "", + upstreamLicenses: "MIT", + upstreamUrl: 'https://github.com/stjude/seaseq', + whatsNew: [ + { + version: "2.0", + changes: ["version of case/sample only", "single-end sequencing with input/control sequencing data", "Initial release"] + } + ] + } + parameter_group: { + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .', + help: 'Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input FASTQ data', + description: 'Genomic input files for experiment.', + help: 'Input one or more sample data and/or SRA identifiers.' + }, + analysis_parameter: { + title: 'Analysis parameter', + description: 'Analysis settings needed for experiment.', + help: 'Analysis settings; such output analysis file name.' + } + } + } + input { + # group: reference_genome + File reference + File? spikein_reference + File? blacklist + File gtf + Array[File]? bowtie_index + Array[File]? spikein_bowtie_index + Array[File]? motif_databases + + # group: input_genomic_data + Array[String]? sample_sraid + Array[File]? sample_fastq + + # group: analysis_parameter + String? results_name + Boolean run_motifs=true + + } + + parameter_meta { + reference: { + description: 'Reference FASTA file', + group: 'reference_genome', + patterns: ["*.fa", "*.fasta", "*.fa.gz", "*.fasta.gz"] + } + blacklist: { + description: 'Blacklist file in BED format', + group: 'reference_genome', + help: 'If defined, blacklist regions listed are excluded after reference alignment.', + patterns: ["*.bed", "*.bed.gz"] + } + gtf: { + description: 'gene annotation file (.gtf)', + group: 'reference_genome', + help: 'Input gene annotation file from RefSeq or GENCODE (.gtf).', + patterns: ["*.gtf", "*.gtf.gz", "*.gff", "*.gff.gz", "*.gff3", "*.gff3.gz"] + } + bowtie_index: { + description: 'bowtie v1 index files (*.ebwt)', + group: 'reference_genome', + help: 'If not defined, bowtie v1 index files are generated, will take a longer compute time.', + patterns: ["*.ebwt"] + } + motif_databases: { + description: 'One or more of the MEME suite motif databases (*.meme)', + group: 'reference_genome', + help: 'Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).', + patterns: ["*.meme"] + } + sample_sraid: { + description: 'One or more sample SRA (Sequence Read Archive) run identifiers', + group: 'input_genomic_data', + help: 'Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).', + example: 'SRR12345678' + } + sample_fastq: { + description: 'One or more sample FASTQs', + group: 'input_genomic_data', + help: 'Upload zipped FASTQ files.', + patterns: ["*.fq.gz", "*.fastq.gz"] + } + results_name: { + description: 'Experiment results custom name', + group: 'analysis_parameter', + help: 'Input preferred analysis results name (recommended if multiple FASTQs are provided).', + example: 'AllMerge_mapped' + } + run_motifs: { + description: 'Perform Motif Analysis', + group: 'analysis_parameter', + help: 'Setting this means Motif Discovery and Enrichment analysis will be performed.', + example: true + } + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 1 ----------- ### +### ------ Pre-process Analysis Files ------ ### +### ---------------------------------------- ### + + # Process SRRs + if ( defined(sample_sraid) ) { + # Download sample file(s) from SRA database + # outputs: + # fastqdump.fastqfile : downloaded sample files in fastq.gz format + Array[String] string_sra = [1] #buffer to allow for sra_id optionality + Array[String] s_sraid = select_first([sample_sraid, string_sra]) + scatter (eachsra in s_sraid) { + call sra.fastqdump { + input : + sra_id=eachsra, + cloud=false + } + } # end scatter each sra + + Array[File] sample_srafile = flatten(fastqdump.fastqfile) + } # end if sample_sraid + + # Generating INDEX files + #1. Bowtie INDEX files if not provided + if ( !defined(bowtie_index) ) { + # create bowtie index when not provided + call bowtie.index as bowtie_idx { + input : + reference=reference + } + } + #2. Make sure indexes are six else build indexes + if ( defined(bowtie_index) ) { + # check total number of bowtie indexes provided + Array[String] string_bowtie_index = [1] #buffer to allow for bowtie_index optionality + Array[File] int_bowtie_index = select_first([bowtie_index, string_bowtie_index]) + if ( length(int_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as bowtie_idx_2 { + input : + reference=reference + } + } + } + Array[File] actual_bowtie_index = select_first([bowtie_idx_2.bowtie_indexes, bowtie_idx.bowtie_indexes, bowtie_index]) + + # Spike-in DNA + #3. Bowtie INDEX files if not provided + String string_spikein = "1" + Array[String] string_spikein_buffer = [1] + if ( !defined(spikein_bowtie_index) && defined(spikein_reference) ) { + # create bowtie index on spikein genome + call bowtie.index as spikein_bowtie_idx { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + + #4. Make sure indexes are six else build indexes for Spike-in DNA + if ( defined(spikein_bowtie_index) ) { + # check total number of bowtie indexes provided + Array[File] int_spikein_bowtie_index = select_first([spikein_bowtie_index, string_spikein_buffer]) + if ( length(int_spikein_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as spikein_bowtie_idx_2 { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + } + Array[File] actual_spikein_bowtie_index = select_first([spikein_bowtie_idx_2.bowtie_indexes, spikein_bowtie_idx.bowtie_indexes, spikein_bowtie_index, string_spikein_buffer]) + + # FASTA faidx and chromsizes and effective genome size + call samtools.faidx as samtools_faidx { + # create FASTA index and chrom sizes files + input : + reference=reference + } + call util.effective_genome_size as egs { + # effective genome size for FASTA + input : + reference=reference + } + + # Process FASTQs + if ( defined(sample_fastq) ) { + + Array[String] string_fastq = [1] #buffer to allow for fastq optionality + Array[File] s_fastq = select_first([sample_fastq, string_fastq]) + + Array[File] sample_fastqfile = s_fastq + } + Array[File] original_fastqfiles = flatten(select_all([sample_srafile, sample_fastqfile])) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 1 ---------------- ### +### ----------- B: remove Spike-IN reads ------------ ### +### ------------------------------------------------- ### + + # if multiple fastqfiles are provided + Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false + Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false + + if ( defined(spikein_bowtie_index) || defined(spikein_reference) ) { + scatter (eachfastq in original_fastqfiles) { + call fastqc.fastqc as spikein_indv_fastqc { + input : + inputfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' + } + call util.basicfastqstats as spikein_indv_bfs { + input : + fastqfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + call bowtie.spikein_SE as spikein_indv_map { + input : + fastqfile=eachfastq, + index_files=actual_spikein_bowtie_index, + metricsfile=spikein_indv_bfs.metrics_out, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + } + + Array[File] spikein_fastqfiles = spikein_indv_map.unaligned + } + Array[File] fastqfiles = select_first([spikein_fastqfiles, original_fastqfiles]) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 2 ---------------- ### +### ---- A: analysis if multiple FASTQs provided ---- ### +### ------------------------------------------------- ### + + if ( multi_fastq ) { + scatter (eachfastq in fastqfiles) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as indv_fastqc { + input : + inputfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as indv_bfs { + input : + fastqfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping as indv_mapping { + input : + fastqfile=eachfastq, + index_files=actual_bowtie_index, + metricsfile=indv_bfs.metrics_out, + blacklist=blacklist, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as indv_bamfqc { + input : + inputfile=indv_mapping.sorted_bam, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as indv_runspp { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call bedtools.bamtobed as indv_bamtobed { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call util.evalstats as indv_summarystats { + input: + fastq_type="SEAseq Sample FASTQ", + bambed=indv_bamtobed.bedfile, + sppfile=indv_runspp.spp_out, + fastqczip=indv_fastqc.zipfile, + bamflag=indv_mapping.bam_stats, + rmdupflag=indv_mapping.mkdup_stats, + bkflag=indv_mapping.bklist_stats, + fastqmetrics=indv_bfs.metrics_out, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + } # end scatter (for each sample fastq) + + # MERGE BAM FILES + # Execute analysis on merge bam file + # Analysis executed: + # Merge BAM (if more than 1 fastq is provided) + # FastQC on Merge BAM (AllMerge__mapped) + + # merge bam files and perform fasTQC if more than one is provided + call util.mergehtml { + input: + htmlfiles=indv_summarystats.xhtml, + txtfiles=indv_summarystats.textfile, + default_location='SAMPLE', + outputfile = 'AllMapped_' + length(fastqfiles) + '_seaseq-summary-stats.html' + } + + call samtools.mergebam { + input: + bamfiles=indv_mapping.sorted_bam, + metricsfiles=indv_bfs.metrics_out, + default_location = if defined(results_name) then results_name + '/BAM_files' else 'AllMerge_' + length(indv_mapping.sorted_bam) + '_mapped' + '/BAM_files', + outputfile = if defined(results_name) then results_name + '.sorted.bam' else 'AllMerge_' + length(fastqfiles) + '_mapped.sorted.bam' + } + + call fastqc.fastqc as mergebamfqc { + input: + inputfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/QC/FastQC' + } + + call samtools.indexstats as mergeindexstats { + input: + bamfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + + if ( defined(blacklist) ) { + # remove blacklist regions + String string_blacklist = "" #buffer to allow for blacklist optionality + File blacklist_file = select_first([blacklist, string_blacklist]) + call bedtools.intersect as merge_rmblklist { + input : + fileA=mergebam.mergebam, + fileB=blacklist_file, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files', + nooverlap=true + } + call samtools.indexstats as merge_bklist { + input : + bamfile=merge_rmblklist.intersect_out, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + } # end if blacklist provided + + File mergebam_afterbklist = select_first([merge_rmblklist.intersect_out, mergebam.mergebam]) + + call samtools.markdup as merge_markdup { + input : + bamfile=mergebam_afterbklist, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + + call samtools.indexstats as merge_mkdup { + input : + bamfile=merge_markdup.mkdupbam, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + } # end if length(fastqfiles) > 1: multi_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 2 ----------- ### +### -- B: analysis if one FASTQ provided --- ### +### ---------------------------------------- ### + + # if only one fastqfile is provided + if ( one_fastq ) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as uno_fastqc { + input : + inputfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as uno_bfs { + input : + fastqfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping { + input : + fastqfile=fastqfiles[0], + index_files=actual_bowtie_index, + metricsfile=uno_bfs.metrics_out, + blacklist=blacklist, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as uno_bamfqc { + input : + inputfile=mapping.sorted_bam, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as uno_runspp { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + + call bedtools.bamtobed as uno_bamtobed { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + } # end if length(fastqfiles) == 1: one_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 3 ----------- ### +### ----------- ChIP-seq analysis ---------- ### +### ---------------------------------------- ### + + # ChIP-seq and downstream analysis + # Execute analysis on merge bam file + # Analysis executed: + # FIRST: Check if reads are mapped + # Peaks identification (SICER, MACS, ROSE) + # Motif analysis + # Complete Summary statistics + + #collate correct files for downstream analysis + File sample_bam = select_first([mergebam_afterbklist, mapping.bklist_bam, mapping.sorted_bam]) + + call macs.macs { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="auto", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-auto', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-auto' + } + + call util.addreadme { + input : + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS' + } + + call macs.macs as all { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="all", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-all', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-all' + } + + call macs.macs as nomodel { + input : + bamfile=sample_bam, + nomodel=true, + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-nm', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_nm' + } + + call bamtogff.bamtogff { + input : + gtffile=gtf, + chromsizes=samtools_faidx.chromsizes, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/BAM_Density' + } + + call bedtools.bamtobed as forsicerbed { + input : + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]) + } + + call sicer.sicer { + input : + bedfile=forsicerbed.bedfile, + chromsizes=samtools_faidx.chromsizes, + genome_fraction=egs.genomefraction, + fragmentlength=select_first([uno_bfs.readlength, mergebam.avg_readlength]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/BROAD_peaks', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call rose.rose { + input : + gtffile=gtf, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + bedfile_auto=macs.peakbedfile, + bedfile_all=all.peakbedfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/STITCHED_peaks' + } + + call runspp.runspp { + input: + bamfile=sample_bam + } + + call util.peaksanno { + input : + gtffile=gtf, + bedfile=macs.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as all_peaksanno { + input : + gtffile=gtf, + bedfile=all.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=all.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as nomodel_peaksanno { + input : + gtffile=gtf, + bedfile=nomodel.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=nomodel.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as sicer_peaksanno { + input : + gtffile=gtf, + bedfile=sicer.scoreisland, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/BROAD_peaks' + } + + # Motif Analysis + if (run_motifs) { + call motifs.motifs { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=macs.peakbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call util.flankbed { + input : + bedfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call motifs.motifs as flank { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=flankbed.flankbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + } + + call viz.visualization { + input: + wigfile=macs.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=macs.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizall { + input: + wigfile=all.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=all.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as viznomodel { + input: + wigfile=nomodel.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=nomodel.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizsicer { + input: + wigfile=sicer.wigfile, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call bedtools.bamtobed as finalbed { + input: + bamfile=sample_bam + } + + call sortbed.sortbed { + input: + bedfile=finalbed.bedfile + } + + call bedtools.intersect { + input: + fileA=macs.peakbedfile, + fileB=sortbed.sortbed_out, + countoverlap=true, + sorted=true + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 4 ----------- ### +### ---------- Summary Statistics ---------- ### +### ---------------------------------------- ### + + String string_qual = "" #buffer to allow for optionality in if statement + + #SUMMARY STATISTICS + if ( one_fastq ) { + call util.evalstats as uno_summarystats { + # SUMMARY STATISTICS of sample file (only 1 sample file provided) + input: + fastq_type="SEAseq Sample FASTQ", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([uno_bamfqc.zipfile, string_qual]), + bamflag=mapping.bam_stats, + rmdupflag=mapping.mkdup_stats, + bkflag=mapping.bklist_stats, + fastqmetrics=uno_bfs.metrics_out, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as uno_overallsummary { + # Presenting all quality stats for the analysis + input: + overallqc_html=uno_summarystats.xhtml, + overallqc_txt=uno_summarystats.textfile + } + } # end if one_fastq + + if ( multi_fastq ) { + call util.evalstats as merge_summarystats { + # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) + input: + fastq_type="SEAseq Comprehensive", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([mergebamfqc.zipfile, string_qual]), + bamflag=mergeindexstats.flagstats, + rmdupflag=merge_mkdup.flagstats, + bkflag=merge_bklist.flagstats, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as merge_overallsummary { + # Presenting all quality stats for the analysis + input: + sampleqc_html=mergehtml.xhtml, + overallqc_html=merge_summarystats.xhtml, + sampleqc_txt=mergehtml.mergetxt, + overallqc_txt=merge_summarystats.textfile + } + } # end if multi_fastq + + output { + #SPIKE-IN + Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile + Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile + Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output + + #FASTQC + Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile + Array[File?]? indv_s_zipfile = indv_fastqc.zipfile + Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile + Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile + + File? s_mergebam_htmlfile = mergebamfqc.htmlfile + File? s_mergebam_zipfile = mergebamfqc.zipfile + + File? uno_s_htmlfile = uno_fastqc.htmlfile + File? uno_s_zipfile = uno_fastqc.zipfile + File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile + File? uno_s_bam_zipfile = uno_bamfqc.zipfile + + #BASICMETRICS + Array[File?]? s_metrics_out = indv_bfs.metrics_out + File? uno_s_metrics_out = uno_bfs.metrics_out + + #BAMFILES + Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam + Array[File?]? indv_s_indexbam = indv_mapping.bam_index + Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam + Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index + Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam + Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index + + File? uno_s_sortedbam = mapping.sorted_bam + File? uno_s_indexstatsbam = mapping.bam_index + File? uno_s_bkbam = mapping.bklist_bam + File? uno_s_bkindexbam = mapping.bklist_index + File? uno_s_rmbam = mapping.mkdup_bam + File? uno_s_rmindexbam = mapping.mkdup_index + + File? s_mergebamfile = mergebam.mergebam + File? s_mergebamindex = mergeindexstats.indexbam + File? s_bkbam = merge_rmblklist.intersect_out + File? s_bkindexbam = merge_bklist.indexbam + File? s_rmbam = merge_markdup.mkdupbam + File? s_rmindexbam = merge_mkdup.indexbam + + #MACS + File? peakbedfile = macs.peakbedfile + File? peakxlsfile = macs.peakxlsfile + File? summitsfile = macs.summitsfile + File? negativexlsfile = macs.negativepeaks + File? wigfile = macs.wigfile + File? all_peakbedfile = all.peakbedfile + File? all_peakxlsfile = all.peakxlsfile + File? all_summitsfile = all.summitsfile + File? all_wigfile = all.wigfile + File? all_negativexlsfile = all.negativepeaks + File? nm_peakbedfile = nomodel.peakbedfile + File? nm_peakxlsfile = nomodel.peakxlsfile + File? nm_summitsfile = nomodel.summitsfile + File? nm_wigfile = nomodel.wigfile + File? nm_negativexlsfile = nomodel.negativepeaks + File? readme_peaks = addreadme.readme_peaks + + #SICER + File? scoreisland = sicer.scoreisland + File? sicer_wigfile = sicer.wigfile + + #ROSE + File? pngfile = rose.pngfile + File? mapped_union = rose.mapped_union + File? mapped_stitch = rose.mapped_stitch + File? enhancers = rose.enhancers + File? super_enhancers = rose.super_enhancers + File? gff_file = rose.gff_file + File? gff_union = rose.gff_union + File? union_enhancers = rose.union_enhancers + File? stitch_enhancers = rose.stitch_enhancers + File? e_to_g_enhancers = rose.e_to_g_enhancers + File? g_to_e_enhancers = rose.g_to_e_enhancers + File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers + File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers + File? supergenes = rose.super_genes + File? allgenes = rose.all_genes + + #MOTIFS + File? flankbedfile = flankbed.flankbedfile + + File? ame_tsv = motifs.ame_tsv + File? ame_html = motifs.ame_html + File? ame_seq = motifs.ame_seq + File? meme = motifs.meme_out + File? meme_summary = motifs.meme_summary + + File? summit_ame_tsv = flank.ame_tsv + File? summit_ame_html = flank.ame_html + File? summit_ame_seq = flank.ame_seq + File? summit_meme = flank.meme_out + File? summit_meme_summary = flank.meme_summary + + #BAM2GFF + File? s_matrices = bamtogff.s_matrices + File? densityplot = bamtogff.densityplot + File? pdf_gene = bamtogff.pdf_gene + File? pdf_h_gene = bamtogff.pdf_h_gene + File? png_h_gene = bamtogff.png_h_gene + File? jpg_h_gene = bamtogff.jpg_h_gene + File? pdf_promoters = bamtogff.pdf_promoters + File? pdf_h_promoters = bamtogff.pdf_h_promoters + File? png_h_promoters = bamtogff.png_h_promoters + File? jpg_h_promoters = bamtogff.jpg_h_promoters + + #PEAKS-ANNOTATION + File? peak_promoters = peaksanno.peak_promoters + File? peak_genebody = peaksanno.peak_genebody + File? peak_window = peaksanno.peak_window + File? peak_closest = peaksanno.peak_closest + File? peak_comparison = peaksanno.peak_comparison + File? gene_comparison = peaksanno.gene_comparison + File? pdf_comparison = peaksanno.pdf_comparison + + File? all_peak_promoters = all_peaksanno.peak_promoters + File? all_peak_genebody = all_peaksanno.peak_genebody + File? all_peak_window = all_peaksanno.peak_window + File? all_peak_closest = all_peaksanno.peak_closest + File? all_peak_comparison = all_peaksanno.peak_comparison + File? all_gene_comparison = all_peaksanno.gene_comparison + File? all_pdf_comparison = all_peaksanno.pdf_comparison + + File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters + File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody + File? nomodel_peak_window = nomodel_peaksanno.peak_window + File? nomodel_peak_closest = nomodel_peaksanno.peak_closest + File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison + File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison + File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison + + File? sicer_peak_promoters = sicer_peaksanno.peak_promoters + File? sicer_peak_genebody = sicer_peaksanno.peak_genebody + File? sicer_peak_window = sicer_peaksanno.peak_window + File? sicer_peak_closest = sicer_peaksanno.peak_closest + File? sicer_peak_comparison = sicer_peaksanno.peak_comparison + File? sicer_gene_comparison = sicer_peaksanno.gene_comparison + File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison + + #VISUALIZATION + File? bigwig = visualization.bigwig + File? norm_wig = visualization.norm_wig + File? tdffile = visualization.tdffile + File? n_bigwig = viznomodel.bigwig + File? n_norm_wig = viznomodel.norm_wig + File? n_tdffile = viznomodel.tdffile + File? a_bigwig = vizall.bigwig + File? a_norm_wig = vizall.norm_wig + File? a_tdffile = vizall.tdffile + + File? s_bigwig = vizsicer.bigwig + File? s_norm_wig = vizsicer.norm_wig + File? s_tdffile = vizsicer.tdffile + + #QC-STATS + Array[File?]? s_qc_statsfile = indv_summarystats.statsfile + Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile + Array[File?]? s_qc_textfile = indv_summarystats.textfile + File? s_qc_mergehtml = mergehtml.mergefile + + File? s_uno_statsfile = uno_summarystats.statsfile + File? s_uno_htmlfile = uno_summarystats.htmlfile + File? s_uno_textfile = uno_summarystats.textfile + + File? statsfile = merge_summarystats.statsfile + File? htmlfile = merge_summarystats.htmlfile + File? textfile = merge_summarystats.textfile + + File? summaryhtml = select_first([uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml]) + File? summarytxt = select_first([uno_overallsummary.summarytxt,merge_overallsummary.summarytxt]) + } +} \ No newline at end of file diff --git a/ci/Cargo.toml b/ci/Cargo.toml index 17fb3540d..eab3dc404 100644 --- a/ci/Cargo.toml +++ b/ci/Cargo.toml @@ -15,3 +15,6 @@ clap.workspace = true reqwest = { workspace = true, features = ["blocking", "rustls-tls"] } toml.workspace = true toml_edit = { version = "0.22.21", features = ["serde"] } + +[lints] +workspace = true diff --git a/ci/src/main.rs b/ci/src/main.rs index ee4de15a1..d515a8cfc 100644 --- a/ci/src/main.rs +++ b/ci/src/main.rs @@ -48,7 +48,8 @@ use std::time::Duration; use clap::Parser; use toml_edit::DocumentMut; -// note that this list must be topologically sorted by dependencies +/// Crates names to publish. +// Note that this list must be topologically sorted by dependencies. const SORTED_CRATES_TO_PUBLISH: &[&str] = &[ "wdl-grammar", "wdl-ast", @@ -58,45 +59,70 @@ const SORTED_CRATES_TO_PUBLISH: &[&str] = &[ "wdl", ]; +/// Paths to ignore. const IGNORE_PATHS: &[&str] = &["target", "tests", "examples", "benches", "book", "docs"]; +/// An in-memory representation of a crate. #[derive(Debug, Clone)] struct Crate { + /// The manifest file. manifest: DocumentMut, + + /// The path to the manifest. manifest_path: PathBuf, + + /// The path to the changelog. changelog_path: Option, + + /// The name of the crate. name: String, + + /// The version of the crate. version: String, + + /// Whether the version should be bumped. should_bump: bool, } +/// The command line arguments. #[derive(Parser)] -struct Opts { +struct Args { + /// The subcommand. #[clap(subcommand)] - subcmd: SubCommand, + command: Subcommand, } +/// The subcommand to use. #[derive(Parser)] -enum SubCommand { +enum Subcommand { + /// Request to bump a crate/crates. Bump(Bump), + + /// Publishes a crate/crates. Publish(Publish), } +/// The arguments to the `bump` subcommand. #[derive(Parser)] struct Bump { + /// Whether or not the bump should be a patch version increase. #[clap(short, long)] patch: bool, + /// The list of crate names to bump. #[clap(short, long)] crates_to_bump: Vec, } +/// The arguments to the `publish` subcommand. #[derive(Parser)] struct Publish { + /// Whether or not to perform a dry-run of the publishing. #[clap(short, long)] dry_run: bool, } +/// The main function. fn main() { let mut all_crates: Vec>> = Vec::new(); find_crates(".".as_ref(), &mut all_crates); @@ -108,9 +134,9 @@ fn main() { .collect::>(); all_crates.sort_by_key(|krate| publish_order.get(&krate.borrow().name[..])); - let opts = Opts::parse(); - match opts.subcmd { - SubCommand::Bump(Bump { + let opts = Args::parse(); + match opts.command { + Subcommand::Bump(Bump { patch, crates_to_bump, }) => { @@ -150,7 +176,7 @@ fn main() { .success() ); } - SubCommand::Publish(Publish { dry_run }) => { + Subcommand::Publish(Publish { dry_run }) => { // We have so many crates to publish we're frequently either // rate-limited or we run into issues where crates can't publish // successfully because they're waiting on the index entries of @@ -177,6 +203,7 @@ fn main() { } } +/// Finds crates in a particular directory. fn find_crates(dir: &Path, dst: &mut Vec>>) { if dir.join("Cargo.toml").exists() { if let Some(krate) = read_crate(&dir.join("Cargo.toml")) { @@ -195,6 +222,7 @@ fn find_crates(dir: &Path, dst: &mut Vec>>) { } } +/// Reads a crate from a manifest. fn read_crate(manifest_path: &Path) -> Option { let contents = fs::read_to_string(manifest_path).expect("failed to read manifest"); let mut manifest = @@ -223,6 +251,7 @@ fn read_crate(manifest_path: &Path) -> Option { }) } +/// Bumps the version of a crate. fn bump_version(krate: &Crate, crates: &[Rc>], patch: bool) { let mut new_manifest = krate.manifest.clone(); @@ -289,6 +318,7 @@ fn bump(version: &str, patch_bump: bool) -> String { } } +/// Publishes a crate. fn publish(krate: &Crate, dry_run: bool) -> bool { if !SORTED_CRATES_TO_PUBLISH.iter().any(|s| *s == krate.name) { return true; diff --git a/gauntlet/Cargo.toml b/gauntlet/Cargo.toml index b6b4bc111..58e2f8575 100644 --- a/gauntlet/Cargo.toml +++ b/gauntlet/Cargo.toml @@ -26,3 +26,6 @@ tracing.workspace = true tracing-subscriber.workspace = true anyhow.workspace = true codespan-reporting.workspace = true + +[lints] +workspace = true diff --git a/rustfmt.toml b/rustfmt.toml index 3e50da091..259cb6778 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -10,6 +10,6 @@ newline_style = "Unix" normalize_comments = true normalize_doc_attributes = true reorder_impl_items = true +style_edition = "2024" use_field_init_shorthand = true wrap_comments = true -style_edition = "2024" \ No newline at end of file diff --git a/wdl-analysis/Cargo.toml b/wdl-analysis/Cargo.toml index cf64ceef9..bda17aaa2 100644 --- a/wdl-analysis/Cargo.toml +++ b/wdl-analysis/Cargo.toml @@ -40,6 +40,9 @@ tempfile = { workspace = true } default = [] codespan = ["wdl-ast/codespan"] +[lints] +workspace = true + [[test]] name = "analysis" required-features = ["codespan"] diff --git a/wdl-analysis/src/analyzer.rs b/wdl-analysis/src/analyzer.rs index 19cc24cdd..80e8c8a4d 100644 --- a/wdl-analysis/src/analyzer.rs +++ b/wdl-analysis/src/analyzer.rs @@ -27,7 +27,6 @@ use tokio::sync::mpsc; use tokio::sync::oneshot; use url::Url; use walkdir::WalkDir; -use wdl_ast::AstNode; use wdl_ast::Diagnostic; use wdl_ast::SyntaxNode; use wdl_ast::Validator; diff --git a/wdl-analysis/src/engine.rs b/wdl-analysis/src/engine.rs new file mode 100644 index 000000000..3c942b122 --- /dev/null +++ b/wdl-analysis/src/engine.rs @@ -0,0 +1,792 @@ +//! Implementation of the analysis engine. + +use std::cell::RefCell; +use std::collections::HashSet; +use std::fmt; +use std::fs; +use std::path::Path; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use anyhow::anyhow; +use anyhow::bail; +use anyhow::Context; +use anyhow::Result; +use futures::stream::FuturesUnordered; +use futures::Future; +use futures::StreamExt; +use parking_lot::RwLock; +use petgraph::algo::has_path_connecting; +use petgraph::algo::DfsSpace; +use petgraph::graph::NodeIndex; +use petgraph::stable_graph::StableDiGraph; +use petgraph::visit::Visitable; +use petgraph::Direction; +use reqwest::Client; +use rowan::GreenNode; +use tokio::runtime::Handle; +use tokio::sync::mpsc::unbounded_channel; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio::sync::mpsc::UnboundedSender; +use tokio::sync::oneshot; +use tokio::task::JoinHandle; +use url::Url; +use wdl_ast::Ast; +use wdl_ast::AstToken; +use wdl_ast::Diagnostic; +use wdl_ast::SyntaxNode; +use wdl_ast::Validator; + +use crate::rayon::RayonHandle; +use crate::Document; +use crate::DocumentGraph; +use crate::DocumentId; +use crate::DocumentScope; + +/// The minimum number of milliseconds between analysis progress reports. +const MINIMUM_PROGRESS_MILLIS: u128 = 50; + +/// Represents the kind of analysis progress being reported. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ProgressKind { + /// The progress is for parsing documents. + Parsing, + /// The progress is for analyzing documents. + Analyzing, +} + +impl fmt::Display for ProgressKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Parsing => write!(f, "parsing"), + Self::Analyzing => write!(f, "analyzing"), + } + } +} + +/// Represents analysis state. +#[derive(Debug, Default)] + +pub(crate) struct State { + /// The document graph being built. + pub(crate) graph: DocumentGraph, + /// Represents dependency edges that, if they were added to the document + /// graph, would form a cycle. + /// + /// The first in the pair is the importing node and the second is the + /// imported node. + /// + /// This is used to break import cycles; when analyzing the document, if the + /// import exists in this set, a diagnostic will be added and the import + /// otherwise ignored. + pub(crate) cycles: HashSet<(NodeIndex, NodeIndex)>, + /// Space for DFS operations on the document graph. + space: DfsSpace as Visitable>::Map>, +} + +/// Represents the type for progress callbacks. +type ProgressCallback = dyn Fn(ProgressKind, usize, usize) + Send + Sync; + +/// Represents a request to perform analysis. +/// +/// This request is sent to the analysis queue for processing. +struct AnalysisRequest { + /// The identifiers of the documents to analyze. + documents: Vec>, + /// The progress callback to use for the request. + progress: Option>, + /// The sender for completing the analysis request. + completed: oneshot::Sender>, +} + +/// Represents the result of an analysis. +/// +/// Analysis results are cheap to clone. +#[derive(Debug, Clone)] +pub struct AnalysisResult { + /// The id of the analyzed document. + id: Arc, + /// The root node of the document. + /// + /// This is `None` if the document failed to be read. + root: Option, + /// The error encountered when trying to read the document. + /// + /// This is `None` if the document was read. + error: Option>, + /// The diagnostics for the document. + diagnostics: Arc<[Diagnostic]>, + /// The scope of the analyzed document. + scope: Arc, +} + +impl AnalysisResult { + /// Constructs a new analysis result for the given document. + pub(crate) fn new(document: &Document) -> Self { + let state = document.state.completed(); + Self { + id: document.id.clone(), + root: document.root.clone(), + error: document.error.clone(), + diagnostics: state.diagnostics.clone(), + scope: state.scope.clone(), + } + } + + /// Gets the identifier of the document that was analyzed. + pub fn id(&self) -> &DocumentId { + &self.id + } + + /// Gets the root node of the document that was analyzed. + /// + /// Returns `None` if the document could not be read. + pub fn root(&self) -> Option<&GreenNode> { + self.root.as_ref() + } + + /// Gets the error if the document could not be read. + /// + /// Returns `None` if the document was read. + pub fn error(&self) -> Option<&anyhow::Error> { + self.error.as_deref() + } + + /// Gets the diagnostics associated with the document. + pub fn diagnostics(&self) -> &[Diagnostic] { + &self.diagnostics + } + + /// Gets the scope of the analyzed document. + pub fn scope(&self) -> &DocumentScope { + &self.scope + } +} + +/// Represents a Workflow Description Language (WDL) analysis engine. +/// +/// By default, analysis parses documents, performs validation checks, resolves +/// imports, and performs type checking. +/// +/// Each analysis operation is processed in order of request; however, the +/// individual parsing, resolution, and analysis of documents is performed +/// across a thread pool. +#[derive(Debug)] +pub struct AnalysisEngine { + /// The document graph. + graph: Arc>, + /// The sender for sending analysis requests. + sender: UnboundedSender, + /// The join handle of the queue task. + queue: JoinHandle<()>, +} + +impl AnalysisEngine { + /// Creates a new analysis engine using a default validator. + /// + /// The engine must be constructed from the context of a Tokio runtime. + pub fn new() -> Result { + let graph: Arc> = Default::default(); + let (sender, queue) = Self::spawn_analysis_queue_task(graph.clone(), None); + Ok(Self { + graph, + sender, + queue, + }) + } + + /// Creates a new analysis engine with the given function that produces a + /// validator to use. + /// + /// The provided function will be called once per worker thread to + /// initialize a thread-local validator. + /// + /// The engine must be constructed from the context of a Tokio runtime. + pub fn new_with_validator(validator: V) -> Result + where + V: Fn() -> Validator + Send + Sync + 'static, + { + let graph: Arc> = Default::default(); + let (sender, queue) = + Self::spawn_analysis_queue_task(graph.clone(), Some(Arc::new(validator))); + Ok(Self { + graph, + sender, + queue, + }) + } + + /// Analyzes the given file system path. + /// + /// If the path is a directory, the directory will be recursively searched + /// for files with a `.wdl` extension to analyze. + /// + /// Otherwise, a single file is analyzed. + pub async fn analyze(&self, path: &Path) -> Vec { + let documents = Self::find_documents(path).await; + if documents.is_empty() { + log::info!( + "no WDL documents were found for path `{path}`", + path = path.display() + ); + return Vec::new(); + } + + let (tx, rx) = oneshot::channel(); + self.sender + .send(AnalysisRequest { + documents, + progress: None, + completed: tx, + }) + .expect("failed to send analysis request"); + + rx.await.expect("failed to receive analysis results") + } + + /// Analyzes the given file system path and reports progress to the given + /// callback. + /// + /// If the path is a directory, the directory will be recursively searched + /// for files with a `.wdl` extension to analyze. + /// + /// Otherwise, a single file is analyzed. + /// + /// Progress is reported to the provided callback function with a minimum + /// 50ms interval. + pub async fn analyze_with_progress(&self, path: &Path, progress: F) -> Vec + where + F: Fn(ProgressKind, usize, usize) + Send + Sync + 'static, + { + let documents = Self::find_documents(path).await; + if documents.is_empty() { + log::info!( + "no WDL documents were found for path `{path}`", + path = path.display() + ); + return Vec::new(); + } + + let (tx, rx) = oneshot::channel(); + self.sender + .send(AnalysisRequest { + documents, + progress: Some(Box::new(progress)), + completed: tx, + }) + .expect("failed to send analysis request"); + + rx.await.expect("failed to receive analysis results") + } + + /// Gets a previous analysis result for a file. + /// + /// Returns `None` if the file has not been analyzed yet. + pub fn result(&self, path: &Path) -> Option { + let id = DocumentId::try_from(path).ok()?; + let graph = self.graph.read(); + let index = graph.indexes.get(&id)?; + Some(AnalysisResult::new(&graph.inner[*index])) + } + + /// Shuts down the engine and waits for outstanding requests to complete. + pub async fn shutdown(self) { + drop(self.sender); + self.queue.await.expect("expected the queue to shut down"); + } + + /// Spawns the analysis queue task. + fn spawn_analysis_queue_task( + graph: Arc>, + validator: Option Validator + Send + Sync>>, + ) -> (UnboundedSender, JoinHandle<()>) { + let (tx, rx) = unbounded_channel::(); + let handle = tokio::spawn(Self::process_analysis_queue(graph, validator, rx)); + (tx, handle) + } + + /// Processes the analysis queue. + /// + /// The queue task processes analysis requests in the order of insertion + /// into the queue. + /// + /// It is also the only writer to the shared document graph. + async fn process_analysis_queue( + graph: Arc>, + validator: Option Validator + Send + Sync>>, + mut receiver: UnboundedReceiver, + ) { + log::info!("analysis queue has started"); + + let client = Client::default(); + while let Some(request) = receiver.recv().await { + log::info!( + "received request to analyze {count} document(s)", + count = request.documents.len() + ); + + // We start by populating the parse set with the request documents + // After each parse set completes, we search for imports to add to the parse set + // and continue until the parse set is empty; once the graph is built, we spawn + // analysis tasks to process every node in the graph. + let start = Instant::now(); + let mut state = State::default(); + let mut parse_set = request.documents.into_iter().collect::>(); + let mut requested = true; + let handle = Handle::current(); + while !parse_set.is_empty() { + let tasks = parse_set + .iter() + .map(|id| { + Self::spawn_parse_task(&handle, &client, &validator, id.clone(), requested) + }) + .collect::>(); + + // The remaining files to parse were not part of the request + requested = false; + + let parsed = Self::await_with_progress::<_, _, Vec<_>>( + ProgressKind::Parsing, + tasks, + &request.progress, + ) + .await; + + parse_set.clear(); + (state, parse_set) = Self::add_import_dependencies(state, parsed, parse_set).await; + } + + let total = state.graph.inner.node_count(); + let state = Self::spawn_analysis_tasks(state, &request.progress).await; + + // Spawn a task for merging the graph as this takes a lock + let graph = graph.clone(); + let results = RayonHandle::spawn(move || { + log::info!("merging document graphs"); + let mut graph = graph.write(); + graph.merge(state.graph) + }) + .await; + + log::info!( + "analysis request completed with {total} document(s) analyzed in {elapsed:?}", + elapsed = start.elapsed() + ); + + request + .completed + .send(results) + .expect("failed to send analysis results"); + } + + log::info!("analysis queue has shut down"); + } + + /// Finds documents for the given path. + /// + /// If the path is a directory, it is searched for `.wdl` files. + /// + /// Otherwise, returns a single identifier for the given path. + async fn find_documents(path: &Path) -> Vec> { + if path.is_dir() { + let pattern = format!("{path}/**/*.wdl", path = path.display()); + return RayonHandle::spawn(move || { + let options = glob::MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: true, + }; + + match glob::glob_with(&pattern, options) { + Ok(paths) => paths + .filter_map(|p| match p { + Ok(path) => Some(Arc::new(DocumentId::try_from(path.as_path()).ok()?)), + Err(e) => { + log::error!("error while searching for WDL documents: {e}"); + None + } + }) + .collect(), + Err(e) => { + log::error!("error while searching for WDL documents: {e}"); + Vec::new() + } + } + }) + .await; + } + + DocumentId::try_from(path) + .map(|id| vec![Arc::new(id)]) + .unwrap_or_default() + } + + /// Awaits the given set of futures while providing progress to the given + /// callback. + async fn await_with_progress( + kind: ProgressKind, + tasks: FuturesUnordered, + progress: &Option>, + ) -> C + where + T: Future, + C: Extend + Default, + { + if tasks.is_empty() { + return Default::default(); + } + + let total = tasks.len(); + if let Some(progress) = &progress { + progress(kind, 0, total); + } + + let mut completed = 0; + let mut last_progress = Instant::now(); + let collection = tasks + .map(|r| { + completed += 1; + + if let Some(progress) = progress { + let now = Instant::now(); + if completed < total + && (now - last_progress).as_millis() > MINIMUM_PROGRESS_MILLIS + { + log::info!("{completed} out of {total} {kind} task(s) have completed"); + last_progress = now; + progress(kind, completed, total); + } + } + + r + }) + .collect() + .await; + + log::info!("{total} {kind} task(s) have completed"); + if let Some(progress) = &progress { + progress(kind, total, total); + } + + collection + } + + /// Spawns a parse task on a rayon thread. + fn spawn_parse_task( + handle: &Handle, + client: &Client, + validator: &Option Validator + Send + Sync>>, + id: Arc, + requested: bool, + ) -> RayonHandle { + thread_local! { + static VALIDATOR: RefCell> = const { RefCell::new(None) }; + } + + let handle = handle.clone(); + let client = client.clone(); + let validator = validator.clone(); + RayonHandle::spawn(move || { + VALIDATOR.with_borrow_mut(|v| { + let validator = v.get_or_insert_with(|| validator.map(|v| v()).unwrap_or_default()); + match Self::parse(&handle, &client, Some(validator), &id) { + Ok((root, diagnostics)) => { + Document::from_parse(id, root, diagnostics, requested) + } + Err(e) => { + log::warn!("{e:#}"); + Document::from_error(id, e, requested) + } + } + }) + }) + } + + /// Parses the given document by URI. + /// + /// If the URI is `http` or `https` scheme, it fetches the source from the + /// network. + /// + /// If the URI is `file` scheme, it reads the file from the local file + /// system. + /// + /// Returns the root node and diagnostics upon success or a single document + /// if there was a problem with accessing the document's source. + fn parse( + tokio: &Handle, + client: &Client, + validator: Option<&mut Validator>, + id: &DocumentId, + ) -> Result<(GreenNode, Vec)> { + let source = match id { + DocumentId::Path(path) => fs::read_to_string(path)?, + DocumentId::Uri(uri) => match uri.scheme() { + "https" | "http" => Self::download_source(tokio, client, uri)?, + "file" => { + let path = uri + .to_file_path() + .map_err(|_| anyhow!("invalid file URI `{uri}`"))?; + log::info!("reading document `{path}`", path = path.display()); + fs::read_to_string(&path)? + } + scheme => { + bail!("unsupported URI scheme `{scheme}`"); + } + }, + }; + + let (node, diagnostics) = Self::parse_source(id, &source, validator); + Ok((node, diagnostics)) + } + + /// Parses the given source and validates the result with the given + /// validator. + fn parse_source( + id: &DocumentId, + source: &str, + validator: Option<&mut Validator>, + ) -> (GreenNode, Vec) { + let start = Instant::now(); + let (document, mut diagnostics) = wdl_ast::Document::parse(source); + + if diagnostics.is_empty() { + if let Some(validator) = validator { + diagnostics.extend(validator.validate(&document).err().unwrap_or_default()); + } + } + + log::info!("parsing of `{id}` completed in {:?}", start.elapsed()); + (document.syntax().green().into(), diagnostics) + } + + /// Downloads the source of a `http` or `https` scheme URI. + /// + /// This makes a request on the provided tokio runtime to download the + /// source. + fn download_source(tokio: &Handle, client: &Client, uri: &Url) -> Result { + /// The timeout for downloading the source, in seconds. + const TIMEOUT_IN_SECS: u64 = 30; + + log::info!("downloading source from `{uri}`"); + + // TODO: we should be caching these responses on disk somewhere + tokio.block_on(async { + let resp = client + .get(uri.as_str()) + .timeout(Duration::from_secs(TIMEOUT_IN_SECS)) + .send() + .await?; + + let code = resp.status(); + if !code.is_success() { + bail!("server returned HTTP status {code}"); + } + + resp.text().await.context("failed to read response body") + }) + } + + /// Adds import dependencies of parsed documents to the state. + /// + /// This will add empty nodes to the graph for any missing imports and + /// populate the parse set with documents that need to be parsed. + async fn add_import_dependencies( + mut state: State, + parsed: Vec, + mut parse_set: HashSet>, + ) -> (State, HashSet>) { + RayonHandle::spawn(move || { + for document in parsed { + // Add the newly parsed document to the graph; if the document was previously + // added as an import dependency, it is replaced with the newly parsed document + let id = document.id.clone(); + state.graph.add_document(document); + + let (doc_index, document) = state + .graph + .document(&id) + .expect("document was just added to the state"); + let root = match &document.root { + Some(root) => root, + None => continue, + }; + + match wdl_ast::Document::cast(SyntaxNode::new_root(root.clone())) + .expect("root should cast") + .ast() + { + Ast::Unsupported => {} + Ast::V1(ast) => { + for import in ast.imports() { + let text = match import.uri().text() { + Some(text) => text, + None => continue, + }; + + let import_id = match DocumentId::relative_to(&id, text.as_str()) { + Ok(id) => Arc::new(id), + Err(_) => continue, + }; + + match state.graph.document(&import_id) { + Some((dep_index, _)) => { + // The dependency is already in the graph, so add a dependency + // edge; however, we must detect a cycle before doing so + if has_path_connecting( + &state.graph.inner, + doc_index, + dep_index, + Some(&mut state.space), + ) { + // Adding the edge would cause a cycle, so record the cycle + // instead + log::info!( + "an import cycle was detected between `{id}` and \ + `{import_id}`" + ); + state.cycles.insert((doc_index, dep_index)); + } else { + // The edge won't cause a cycle, so add it + log::info!( + "updating dependency edge from `{id}` to `{import_id}`" + ); + state.graph.inner.update_edge(dep_index, doc_index, ()); + } + } + None => { + // The dependency isn't in the graph; add a new node and + // dependency edge + log::info!( + "updating dependency edge from `{id}` to `{import_id}` \ + (added to parse queue)" + ); + let dep_index = state + .graph + .add_document(Document::new(import_id.clone(), false)); + state.graph.inner.update_edge(dep_index, doc_index, ()); + parse_set.insert(import_id); + } + } + } + } + } + } + + (state, parse_set) + }) + .await + } + + /// Spawns analysis tasks. + /// + /// Analysis tasks are spawned in topological order. + async fn spawn_analysis_tasks(state: State, progress: &Option>) -> State { + // As we're going to be analyzing on multiple threads, wrap the state with a + // `RwLock`. + let mut state = Arc::new(RwLock::new(state)); + let mut remaining: Option, ()>> = None; + let mut set = Vec::new(); + while remaining + .as_ref() + .map(|g| g.node_count() > 0) + .unwrap_or(true) + { + (state, remaining, set) = RayonHandle::spawn(move || { + // Insert a copy of the graph where we just map the nodes to the document + // identifiers; we need a copy as we are going to be removing nodes from the + // graph as we process them in topological order + let g = remaining.get_or_insert_with(|| { + state.read().graph.inner.map(|_, n| n.id.clone(), |_, _| ()) + }); + + // Build a set of nodes with no incoming edges + set.clear(); + for node in g.node_indices() { + if g.edges_directed(node, Direction::Incoming).next().is_none() { + set.push(node); + } + } + + // Remove the nodes we're about to analyze from the "remaining" graph + // This also removes the outgoing edges from those nodes + for index in &set { + g.remove_node(*index); + } + + (state, remaining, set) + }) + .await; + + let tasks = set + .iter() + .map(|index| { + let index = *index; + let state = state.clone(); + RayonHandle::spawn(move || Self::analyze_node(state, index)) + }) + .collect::>(); + + Self::await_with_progress::<_, _, Vec<_>>(ProgressKind::Analyzing, tasks, progress) + .await; + } + + // We're finished with the tasks; there should be no outstanding references to + // the state + Arc::into_inner(state) + .expect("only one reference should remain") + .into_inner() + } + + /// Analyzes a node in the document graph. + /// + /// This completes the analysis state of the node. + fn analyze_node(state: Arc>, index: NodeIndex) { + let (id, root) = { + // scope for read lock + let state = state.read(); + let node = &state.graph.inner[index]; + (node.id.clone(), node.root.clone()) + }; + + log::info!("analyzing `{id}`"); + let start = Instant::now(); + let (scope, diagnostics) = if let Some(root) = root { + let document = + wdl_ast::Document::cast(SyntaxNode::new_root(root)).expect("root should cast"); + let state = state.read(); + DocumentScope::new(&state, &id, &document) + } else { + (Default::default(), Default::default()) + }; + + { + // Scope for write lock + // Write the result of the analysis to the document + let mut state = state.write(); + let doc = &mut state.graph.inner[index]; + let state = doc.state.in_progress(); + + state.scope = scope; + if !diagnostics.is_empty() { + state.diagnostics.extend(diagnostics); + } + + // Complete the analysis of the document + doc.complete(); + } + + log::info!( + "analysis of `{id}` completed in {elapsed:?}", + elapsed = start.elapsed() + ) + } +} + +/// Constant that asserts `AnalysisEngine` is `Send + Sync`; if not, it fails to +/// compile. +const _: () = { + /// Helper that will fail to compile if T is not `Send + Sync`. + const fn _assert() {} + _assert::(); +}; diff --git a/wdl-analysis/src/eval/v1.rs b/wdl-analysis/src/eval/v1.rs index 2c6fdaca7..8ad7ddd2f 100644 --- a/wdl-analysis/src/eval/v1.rs +++ b/wdl-analysis/src/eval/v1.rs @@ -168,7 +168,7 @@ impl TaskGraph { graph.requirements = Some(graph.inner.add_node(TaskGraphNode::Requirements(section))); } - TaskItem::Hints(section) + TaskItem::TaskHints(section) if version >= SupportedVersion::V1(V1::Two) && graph.hints.is_none() && graph.runtime.is_none() => diff --git a/wdl-analysis/src/graph.rs b/wdl-analysis/src/graph.rs index 8257e8a20..142a89162 100644 --- a/wdl-analysis/src/graph.rs +++ b/wdl-analysis/src/graph.rs @@ -27,7 +27,6 @@ use tracing::debug; use tracing::info; use url::Url; use uuid::Uuid; -use wdl_ast::AstNode; use wdl_ast::Diagnostic; use wdl_ast::SyntaxNode; use wdl_ast::Validator; diff --git a/wdl-analysis/src/stdlib.rs b/wdl-analysis/src/stdlib.rs index 129d764c4..4508bcbd9 100644 --- a/wdl-analysis/src/stdlib.rs +++ b/wdl-analysis/src/stdlib.rs @@ -7,8 +7,8 @@ use std::sync::LazyLock; use indexmap::IndexMap; use indexmap::IndexSet; -use wdl_ast::SupportedVersion; use wdl_ast::version::V1; +use wdl_ast::SupportedVersion; use crate::types::ArrayType; use crate::types::Coercible; @@ -1441,62 +1441,57 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { let mut functions = IndexMap::new(); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#floor - assert!( - functions - .insert( - "floor", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Float) - .ret(PrimitiveTypeKind::Integer) - .build(), - ) - .into(), + assert!(functions + .insert( + "floor", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Float) + .ret(PrimitiveTypeKind::Integer) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#ceil - assert!( - functions - .insert( - "ceil", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Float) - .ret(PrimitiveTypeKind::Integer) - .build(), - ) - .into(), + assert!(functions + .insert( + "ceil", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Float) + .ret(PrimitiveTypeKind::Integer) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#round - assert!( - functions - .insert( - "round", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Float) - .ret(PrimitiveTypeKind::Integer) - .build(), - ) - .into(), + assert!(functions + .insert( + "round", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Float) + .ret(PrimitiveTypeKind::Integer) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#min - assert!( - functions - .insert( - "min", - PolymorphicFunction::new(SupportedVersion::V1(V1::One), vec![ + assert!(functions + .insert( + "min", + PolymorphicFunction::new( + SupportedVersion::V1(V1::One), + vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::Integer) .parameter(PrimitiveTypeKind::Integer) @@ -1517,18 +1512,19 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::Float) .ret(PrimitiveTypeKind::Float) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#max - assert!( - functions - .insert( - "max", - PolymorphicFunction::new(SupportedVersion::V1(V1::One), vec![ + assert!(functions + .insert( + "max", + PolymorphicFunction::new( + SupportedVersion::V1(V1::One), + vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::Integer) .parameter(PrimitiveTypeKind::Integer) @@ -1549,73 +1545,68 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::Float) .ret(PrimitiveTypeKind::Float) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-find - assert!( - functions - .insert( - "find", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .ret(PrimitiveType::optional(PrimitiveTypeKind::String)) - .build(), - ) - .into(), + assert!(functions + .insert( + "find", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .ret(PrimitiveType::optional(PrimitiveTypeKind::String)) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-matches - assert!( - functions - .insert( - "matches", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .ret(PrimitiveTypeKind::Boolean) - .build(), - ) - .into(), + assert!(functions + .insert( + "matches", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .ret(PrimitiveTypeKind::Boolean) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#sub - assert!( - functions - .insert( - "sub", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .ret(PrimitiveTypeKind::String) - .build(), - ) - .into(), + assert!(functions + .insert( + "sub", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .ret(PrimitiveTypeKind::String) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#basename - assert!( - functions - .insert( - "basename", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "basename", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .required(1) .parameter(PrimitiveTypeKind::File) @@ -1638,18 +1629,19 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::String) .ret(PrimitiveTypeKind::String) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-join_paths - assert!( - functions - .insert( - "join_paths", - PolymorphicFunction::new(SupportedVersion::V1(V1::Two), vec![ + assert!(functions + .insert( + "join_paths", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Two), + vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::File) .parameter(PrimitiveTypeKind::String) @@ -1664,35 +1656,34 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string_non_empty) .ret(PrimitiveTypeKind::File) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#glob - assert!( - functions - .insert( - "glob", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .ret(array_file) - .build(), - ) - .into(), + assert!(functions + .insert( + "glob", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .ret(array_file) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#size - assert!( - functions - .insert( - "size", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "size", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .required(1) .parameter(PrimitiveType::optional(PrimitiveTypeKind::File)) @@ -1722,152 +1713,137 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::String) .ret(PrimitiveTypeKind::Float) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#stdout - assert!( - functions - .insert( - "stdout", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .ret(PrimitiveTypeKind::File) - .build(), - ) - .into(), + assert!(functions + .insert( + "stdout", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .ret(PrimitiveTypeKind::File) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#stderr - assert!( - functions - .insert( - "stderr", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .ret(PrimitiveTypeKind::File) - .build(), - ) - .into(), + assert!(functions + .insert( + "stderr", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .ret(PrimitiveTypeKind::File) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_string - assert!( - functions - .insert( - "read_string", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::String) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_string", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::String) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_int - assert!( - functions - .insert( - "read_int", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::Integer) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_int", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::Integer) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_float - assert!( - functions - .insert( - "read_float", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::Float) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_float", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::Float) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_boolean - assert!( - functions - .insert( - "read_boolean", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::Boolean) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_boolean", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::Boolean) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_lines - assert!( - functions - .insert( - "read_lines", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(array_string) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_lines", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(array_string) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_lines - assert!( - functions - .insert( - "write_lines", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(array_string) - .ret(PrimitiveTypeKind::File) - .build(), - ) - .into(), + assert!(functions + .insert( + "write_lines", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(array_string) + .ret(PrimitiveTypeKind::File) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_tsv - assert!( - functions - .insert( - "read_tsv", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "read_tsv", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::File) .ret(array_array_string) @@ -1883,18 +1859,19 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string) .ret(array_object) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_tsv - assert!( - functions - .insert( - "write_tsv", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "write_tsv", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .parameter(array_array_string) .ret(PrimitiveTypeKind::File) @@ -1917,121 +1894,110 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string) .ret(PrimitiveTypeKind::File) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_map - assert!( - functions - .insert( - "read_map", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(map_string_string) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_map", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(map_string_string) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_map - assert!( - functions - .insert( - "write_map", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(map_string_string) - .ret(PrimitiveTypeKind::File) - .build(), - ) - .into(), + assert!(functions + .insert( + "write_map", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(map_string_string) + .ret(PrimitiveTypeKind::File) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_json - assert!( - functions - .insert( - "read_json", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(Type::Union) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_json", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(Type::Union) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_json - assert!( - functions - .insert( - "write_json", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("X", JsonSerializableConstraint) - .parameter(GenericType::Parameter("X")) - .ret(PrimitiveTypeKind::File) - .build(), - ) - .into(), + assert!(functions + .insert( + "write_json", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("X", JsonSerializableConstraint) + .parameter(GenericType::Parameter("X")) + .ret(PrimitiveTypeKind::File) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_object - assert!( - functions - .insert( - "read_object", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(Type::Object) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_object", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(Type::Object) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_objects - assert!( - functions - .insert( - "read_objects", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(array_object) - .build(), - ) - .into(), + assert!(functions + .insert( + "read_objects", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(array_object) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_object - assert!( - functions - .insert( - "write_object", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "write_object", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .parameter(Type::Object) .ret(PrimitiveTypeKind::File) @@ -2041,18 +2007,19 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericType::Parameter("S")) .ret(PrimitiveTypeKind::File) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_objects - assert!( - functions - .insert( - "write_objects", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "write_objects", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .parameter(array_object) .ret(PrimitiveTypeKind::File) @@ -2062,284 +2029,261 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericArrayType::new(GenericType::Parameter("S"))) .ret(PrimitiveTypeKind::File) .build(), - ],) - .into(), + ], ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#prefix - assert!( - functions - .insert( - "prefix", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(PrimitiveTypeKind::String) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), - ) - .into(), + assert!(functions + .insert( + "prefix", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(PrimitiveTypeKind::String) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#suffix - assert!( - functions - .insert( - "suffix", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(PrimitiveTypeKind::String) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), - ) - .into(), + assert!(functions + .insert( + "suffix", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(PrimitiveTypeKind::String) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#quote - assert!( - functions - .insert( - "quote", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), - ) - .into(), + assert!(functions + .insert( + "quote", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#squote - assert!( - functions - .insert( - "squote", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), - ) - .into(), + assert!(functions + .insert( + "squote", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#sep - assert!( - functions - .insert( - "sep", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(PrimitiveTypeKind::String) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(PrimitiveTypeKind::String) - .build(), - ) - .into(), + assert!(functions + .insert( + "sep", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(PrimitiveTypeKind::String) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(PrimitiveTypeKind::String) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#range - assert!( - functions - .insert( - "range", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Integer) - .ret(array_int) - .build(), - ) - .into(), + assert!(functions + .insert( + "range", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Integer) + .ret(array_int) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#transpose - assert!( - functions - .insert( - "transpose", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .parameter(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X"), - ))) - .ret(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X"), - ))) - .build(), - ) - .into(), + assert!(functions + .insert( + "transpose", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .parameter(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X"), + ))) + .ret(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X"), + ))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#cross - assert!( - functions - .insert( - "cross", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .any_type_parameter("Y") - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) - .ret(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("X"), - GenericType::Parameter("Y"), - ))) - .build(), - ) - .into(), + assert!(functions + .insert( + "cross", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .any_type_parameter("Y") + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) + .ret(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("X"), + GenericType::Parameter("Y"), + ))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#zip - assert!( - functions - .insert( - "zip", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .any_type_parameter("Y") - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) - .ret(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("X"), - GenericType::Parameter("Y"), - ))) - .build(), - ) - .into(), + assert!(functions + .insert( + "zip", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .any_type_parameter("Y") + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) + .ret(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("X"), + GenericType::Parameter("Y"), + ))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#unzip - assert!( - functions - .insert( - "unzip", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .any_type_parameter("X") - .any_type_parameter("Y") - .parameter(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("X"), - GenericType::Parameter("Y"), - ))) - .ret(GenericPairType::new( - GenericArrayType::new(GenericType::Parameter("X")), - GenericArrayType::new(GenericType::Parameter("Y")), - )) - .build(), - ) - .into(), + assert!(functions + .insert( + "unzip", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .any_type_parameter("X") + .any_type_parameter("Y") + .parameter(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("X"), + GenericType::Parameter("Y"), + ))) + .ret(GenericPairType::new( + GenericArrayType::new(GenericType::Parameter("X")), + GenericArrayType::new(GenericType::Parameter("Y")), + )) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-contains - assert!( - functions - .insert( - "contains", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .type_parameter("P", AnyPrimitiveTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .parameter(GenericType::Parameter("P")) - .ret(PrimitiveTypeKind::Boolean) - .build(), - ) - .into(), + assert!(functions + .insert( + "contains", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .type_parameter("P", AnyPrimitiveTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .parameter(GenericType::Parameter("P")) + .ret(PrimitiveTypeKind::Boolean) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-chunk - assert!( - functions - .insert( - "chunk", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .any_type_parameter("X") - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .parameter(PrimitiveTypeKind::Integer) - .ret(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X"), - ))) - .build(), - ) - .into(), + assert!(functions + .insert( + "chunk", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .any_type_parameter("X") + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .parameter(PrimitiveTypeKind::Integer) + .ret(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X"), + ))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#flatten - assert!( - functions - .insert( - "flatten", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .parameter(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X") - ))) - .ret(GenericArrayType::new(GenericType::Parameter("X"))) - .build(), - ) - .into(), + assert!(functions + .insert( + "flatten", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .parameter(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X") + ))) + .ret(GenericArrayType::new(GenericType::Parameter("X"))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#select_first - assert!( - functions - .insert( - "select_first", - // This differs from the definition of `select_first` in that we can have a single - // signature of `X select_first(Array[X?], [X])`. - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), + assert!(functions + .insert( + "select_first", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ + FunctionSignature::builder() + .type_parameter("X", OptionalTypeConstraint) + .parameter(GenericArrayType::non_empty(GenericType::Parameter("X"))) + .ret(GenericType::UnqualifiedParameter("X")) + .build(), FunctionSignature::builder() .type_parameter("X", OptionalTypeConstraint) .required(1) @@ -2347,88 +2291,83 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericType::UnqualifiedParameter("X")) .ret(GenericType::UnqualifiedParameter("X")) .build(), - ) - .into(), + ] ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#select_all - assert!( - functions - .insert( - "select_all", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("X", OptionalTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .ret(GenericArrayType::new(GenericType::UnqualifiedParameter( - "X" - ))) - .build(), - ) - .into(), + assert!(functions + .insert( + "select_all", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("X", OptionalTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .ret(GenericArrayType::new(GenericType::UnqualifiedParameter( + "X" + ))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#as_pairs - assert!( - functions - .insert( - "as_pairs", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericMapType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - )) - .ret(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - ))) - .build(), - ) - .into(), + assert!(functions + .insert( + "as_pairs", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericMapType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + )) + .ret(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + ))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#as_map - assert!( - functions - .insert( - "as_map", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - ))) - .ret(GenericMapType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - )) - .build(), - ) - .into(), + assert!(functions + .insert( + "as_map", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + ))) + .ret(GenericMapType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + )) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#keys - assert!( - functions - .insert( - "keys", - PolymorphicFunction::new(SupportedVersion::V1(V1::One), vec![ + assert!(functions + .insert( + "keys", + PolymorphicFunction::new( + SupportedVersion::V1(V1::One), + vec![ FunctionSignature::builder() .type_parameter("K", RequiredPrimitiveTypeConstraint) .any_type_parameter("V") @@ -2447,18 +2386,19 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(Type::Object) .ret(array_string) .build(), - ]) - .into(), + ] ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#contains_key - assert!( - functions - .insert( - "contains_key", - PolymorphicFunction::new(SupportedVersion::V1(V1::Two), vec![ + assert!(functions + .insert( + "contains_key", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Two), + vec![ FunctionSignature::builder() .type_parameter("K", RequiredPrimitiveTypeConstraint) .any_type_parameter("V") @@ -2494,83 +2434,78 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string) .ret(PrimitiveTypeKind::Boolean) .build(), - ]) - .into(), + ] ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-values - assert!( - functions - .insert( - "values", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericMapType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - )) - .ret(GenericArrayType::new(GenericType::Parameter("V"))) - .build(), - ) - .into(), + assert!(functions + .insert( + "values", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericMapType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + )) + .ret(GenericArrayType::new(GenericType::Parameter("V"))) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#collect_by_key - assert!( - functions - .insert( - "collect_by_key", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - ))) - .ret(GenericMapType::new( - GenericType::Parameter("K"), - GenericArrayType::new(GenericType::Parameter("V")) - )) - .build(), - ) - .into(), + assert!(functions + .insert( + "collect_by_key", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + ))) + .ret(GenericMapType::new( + GenericType::Parameter("K"), + GenericArrayType::new(GenericType::Parameter("V")) + )) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#defined - assert!( - functions - .insert( - "defined", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("X", OptionalTypeConstraint) - .parameter(GenericType::Parameter("X")) - .ret(PrimitiveTypeKind::Boolean) - .build(), - ) - .into(), + assert!(functions + .insert( + "defined", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("X", OptionalTypeConstraint) + .parameter(GenericType::Parameter("X")) + .ret(PrimitiveTypeKind::Boolean) + .build(), ) - .is_none() - ); + .into(), + ) + .is_none()); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#length - assert!( - functions - .insert( - "length", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ + assert!(functions + .insert( + "length", + PolymorphicFunction::new( + SupportedVersion::V1(V1::Zero), + vec![ FunctionSignature::builder() .any_type_parameter("X") .parameter(GenericArrayType::new(GenericType::Parameter("X"))) @@ -2593,11 +2528,11 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::String) .ret(PrimitiveTypeKind::Integer) .build(), - ]) - .into(), + ] ) - .is_none() - ); + .into(), + ) + .is_none()); StandardLibrary { types, @@ -2737,10 +2672,13 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(1)); let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + ], + ) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(1)); @@ -2748,10 +2686,13 @@ mod test { let e = f .bind(&mut types, &[PrimitiveTypeKind::String.into()]) .expect_err("bind should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Float`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Float`".into() + } + ); // Check for Union (i.e. indeterminate) let ty = f @@ -2782,10 +2723,13 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(1)); let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + ], + ) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(1)); @@ -2793,10 +2737,13 @@ mod test { let e = f .bind(&mut types, &[PrimitiveTypeKind::String.into()]) .expect_err("bind should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Map[K, V]` where `K`: any required primitive type".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Map[K, V]` where `K`: any required primitive type".into() + } + ); // Check for Union (i.e. indeterminate) let ty = f @@ -2823,10 +2770,13 @@ mod test { PrimitiveTypeKind::Boolean, )); let e = f.bind(&mut types, &[ty]).expect_err("bind should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Map[K, Boolean]` where `K`: any required primitive type".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Map[K, Boolean]` where `K`: any required primitive type".into() + } + ); } #[test] @@ -2841,10 +2791,13 @@ mod test { let e = f .bind(&mut types, &[array_string]) .expect_err("bind should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Array[X]` where `X`: any optional type".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Array[X]` where `X`: any optional type".into() + } + ); // Check for a Array[String?] -> Array[String] let array_optional_string = types.add_array(ArrayType::new(PrimitiveType::optional( @@ -2883,97 +2836,136 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(2)); let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - PrimitiveTypeKind::File.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + PrimitiveTypeKind::File.into(), + ], + ) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(2)); // Check for `(Int, Int)` let ty = f - .bind(&mut types, &[ - PrimitiveTypeKind::Integer.into(), - PrimitiveTypeKind::Integer.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::Integer.into(), + PrimitiveTypeKind::Integer.into(), + ], + ) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Int"); // Check for `(Int, Float)` let ty = f - .bind(&mut types, &[ - PrimitiveTypeKind::Integer.into(), - PrimitiveTypeKind::Float.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::Integer.into(), + PrimitiveTypeKind::Float.into(), + ], + ) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Float"); // Check for `(Float, Int)` let ty = f - .bind(&mut types, &[ - PrimitiveTypeKind::Float.into(), - PrimitiveTypeKind::Integer.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::Float.into(), + PrimitiveTypeKind::Integer.into(), + ], + ) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Float"); // Check for `(Float, Float)` let ty = f - .bind(&mut types, &[ - PrimitiveTypeKind::Float.into(), - PrimitiveTypeKind::Float.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::Float.into(), + PrimitiveTypeKind::Float.into(), + ], + ) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Float"); // Check for `(String, Int)` let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Integer.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Integer.into(), + ], + ) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Int` or `Float`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Int` or `Float`".into() + } + ); // Check for `(Int, String)` let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::Integer.into(), - PrimitiveTypeKind::String.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::Integer.into(), + PrimitiveTypeKind::String.into(), + ], + ) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`Int` or `Float`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`Int` or `Float`".into() + } + ); // Check for `(String, Float)` let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Float.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Float.into(), + ], + ) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Int` or `Float`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Int` or `Float`".into() + } + ); // Check for `(Float, String)` let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::Float.into(), - PrimitiveTypeKind::String.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::Float.into(), + PrimitiveTypeKind::String.into(), + ], + ) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`Int` or `Float`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`Int` or `Float`".into() + } + ); } #[test] @@ -2988,11 +2980,14 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(1)); let e = f - .bind(&mut types, &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - PrimitiveTypeKind::File.into(), - ]) + .bind( + &mut types, + &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + PrimitiveTypeKind::File.into(), + ], + ) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(2)); @@ -3000,10 +2995,13 @@ mod test { let e = f .bind(&mut types, &[PrimitiveTypeKind::Integer.into()]) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Array[X]` where `X`: any optional type".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Array[X]` where `X`: any optional type".into() + } + ); // Check `Array[String?]+` let array = types.add_array(ArrayType::non_empty(PrimitiveType::optional( @@ -3024,10 +3022,13 @@ mod test { let e = f .bind(&mut types, &[array, PrimitiveTypeKind::Integer.into()]) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`String`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`String`".into() + } + ); // Check `Array[String?]` let array = types.add_array(ArrayType::new(PrimitiveType::optional( @@ -3048,9 +3049,12 @@ mod test { let e = f .bind(&mut types, &[array, PrimitiveTypeKind::Integer.into()]) .expect_err("binding should fail"); - assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`String`".into() - }); + assert_eq!( + e, + FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`String`".into() + } + ); } } diff --git a/wdl-analysis/src/types/v1.rs b/wdl-analysis/src/types/v1.rs index aeb2a0349..d1ec248a4 100644 --- a/wdl-analysis/src/types/v1.rs +++ b/wdl-analysis/src/types/v1.rs @@ -504,16 +504,20 @@ where // Check for a sep option is specified; if so, accept `Array[P]` where `P` is // primitive. let mut coercible = false; - if let Some(PlaceholderOption::Sep(_)) = placeholder.option() { - if let Type::Compound(c) = ty { - if let CompoundTypeDef::Array(a) = - self.types.type_definition(c.definition()) - { - if !a.element_type().is_optional() - && a.element_type().as_primitive().is_some() + + for option in placeholder.options() { + if let PlaceholderOption::Sep(_) = option { + if let Type::Compound(c) = ty { + if let CompoundTypeDef::Array(a) = + self.types.type_definition(c.definition()) { - // OK - coercible = true; + if !a.element_type().is_optional() + && a.element_type().as_primitive().is_some() + { + // OK + coercible = true; + break; + } } } } diff --git a/wdl-analysis/tests/analysis.rs b/wdl-analysis/tests/analysis.rs index 3c62ca2e5..26e05947d 100644 --- a/wdl-analysis/tests/analysis.rs +++ b/wdl-analysis/tests/analysis.rs @@ -37,6 +37,7 @@ use wdl_analysis::path_to_uri; use wdl_ast::Diagnostic; use wdl_ast::SyntaxNode; +/// Finds tests to run as part of the analysis test suite. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -64,6 +65,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a result. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -74,6 +76,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Comparse a single result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<()> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -100,6 +103,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<()> { Ok(()) } +/// Compares the provided results. fn compare_results(test: &Path, results: Vec) -> Result<()> { let mut buffer = Buffer::no_color(); let cwd = std::env::current_dir().expect("must have a CWD"); @@ -149,6 +153,7 @@ fn compare_results(test: &Path, results: Vec) -> Result<()> { async fn main() { // These are the tests that require single document analysis as they are // sensitive to parse order + /// The tests that require single document analysis. const SINGLE_DOCUMENT_TESTS: &[&str] = &["import-dependency-cycle"]; let tests = find_tests(); @@ -176,6 +181,9 @@ async fn main() { // Discover the results that are relevant only to this test let base = clean(absolute(test).expect("should be made absolute")); + // NOTE: clippy appears to be incorrect that this can be modified to use + // `filter_map`. Perhaps this should be revisited in the future. + #[allow(clippy::filter_map_bool_then)] let results = results .iter() .filter_map(|r| { diff --git a/wdl-ast/Cargo.toml b/wdl-ast/Cargo.toml index 37c3b9b88..fe6698db6 100644 --- a/wdl-ast/Cargo.toml +++ b/wdl-ast/Cargo.toml @@ -12,6 +12,8 @@ documentation = "https://docs.rs/wdl-ast" [dependencies] wdl-grammar = { path = "../wdl-grammar", version = "0.8.0" } +macropol = "0.1.3" +paste = "1.0.15" rowan = { workspace = true } url = { workspace = true } urlencoding = { workspace = true } @@ -26,6 +28,9 @@ codespan-reporting = { workspace = true } [features] codespan = ["wdl-grammar/codespan"] +[lints] +workspace = true + [[test]] name = "validation" required-features = ["codespan"] diff --git a/wdl-ast/src/element.rs b/wdl-ast/src/element.rs new file mode 100644 index 000000000..369b126be --- /dev/null +++ b/wdl-ast/src/element.rs @@ -0,0 +1,755 @@ +//! Elements (nodes or tokens) within the AST. + +use rowan::NodeOrToken; + +use crate::AstNode; +use crate::AstToken; +use crate::Comment; +use crate::Ident; +use crate::SyntaxElement; +use crate::SyntaxKind; +use crate::SyntaxNode; +use crate::SyntaxToken; +use crate::Version; +use crate::VersionStatement; +use crate::Whitespace; +use crate::v1::*; + +#[macropol::macropol] +macro_rules! ast_element_impl { + ( + // The name of the impl to create (e.g., `Node`). + $name:ident, + // The improper name of the impl to be displayed (e.g., `node`). + $display:ident, + // The prefix of the syntax element (e.g., `SyntaxNode`). + $syntax_prefix:ty, + // A mapping of all of the elements to map from syntax elements to ast + // elements. + // + // E.g., `command_section(): CommandSectionNode => CommandSection => CommandSection`. + [$($suffix:ident(): $syntax_kind:ty => $inner:ty => $variant:ty),*] + ) => { + paste::paste! { + impl $name { + #[doc = "Attempts to cast a [`SyntaxElement`] to a [`" $name "`]."] + pub fn cast(element: SyntaxElement) -> Option { + match element.kind() { + $( + SyntaxKind::$syntax_kind => { + let $display = element + .[]() + .expect( + "`SyntaxElement` with kind \ + `SyntaxKind::${stringify!($syntax_kind)}` could not \ + be turned into a `${stringify!($syntax_prefix)}`" + ); + + let inner = $inner::cast($display) + .expect( + "couldn't cast ${stringify!($display)} to \ + `${stringify!($inner)}` + "); + + Some($name::$variant(inner)) + }, + )* + _ => None + } + } + + #[doc = "Returns whether or not a particular [`SyntaxKind`] can cast to a [`" $name "`]."] + pub fn can_cast(kind: &SyntaxKind) -> bool { + match kind { + $( + SyntaxKind::$syntax_kind => true, + )* + _ => false + } + } + + + #[doc = "Gets the inner [`" $syntax_prefix "`] from the [`" $name "`]."] + pub fn syntax(&self) -> &$syntax_prefix { + match self { + $( + $name::$variant(inner) => inner.syntax(), + )* + // NOTE: a wildcard pattern (`_`) should not be required + // here. If one is suggested by the compiler, that means + // you're probably missing a pattern in the macros + // below. + } + } + + $( + /// Attempts to get a reference to the inner [`${stringify!($inner)}`]. + /// + /// * If `self` is a [`${stringify!($variant)}`], then a reference to the + /// inner [`${stringify!($inner)}`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn [](&self) -> Option<&$inner> { + match self { + $name::$variant($suffix) => Some($suffix), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`${stringify!($inner)}`]. + /// + /// * If `self` is a [`${stringify!($variant)}`], then the inner + /// [`${stringify!($inner)}`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn [](self) -> Option<$inner> { + match self { + $name::$variant($suffix) => Some($suffix), + _ => None, + } + } + + /// Consumes `self` and returns the inner [`${stringify!($inner)}`]. + /// + /// # Panics + /// + /// If `self` is not a [`${stringify!($variant)}`]. + pub fn [](self) -> $inner { + self.[]().expect( + "expected `${stringify!($variant)}` but got a different variant" + ) + } + )* + } + } + }; +} + +/// An abstract syntax tree node. +/// +/// This enum has a variant for each struct implementing the [`AstNode`] trait. +#[derive(Clone, Debug)] +pub enum Node { + /// An access expression. + AccessExpr(AccessExpr), + /// An addition expression. + AdditionExpr(AdditionExpr), + /// An array type. + ArrayType(ArrayType), + /// A V1 abstract syntax tree. + Ast(Ast), + /// A bound declaration. + BoundDecl(BoundDecl), + /// An after clause in a call statement. + CallAfter(CallAfter), + /// An alias clause in a call statement. + CallAlias(CallAlias), + /// A call expression. + CallExpr(CallExpr), + /// A call input item. + CallInputItem(CallInputItem), + /// A call statement. + CallStatement(CallStatement), + /// A target within a call statement. + CallTarget(CallTarget), + /// A command section. + CommandSection(CommandSection), + /// A conditional statement. + ConditionalStatement(ConditionalStatement), + /// The `default` placeholder option. + DefaultOption(DefaultOption), + /// A division expression. + DivisionExpr(DivisionExpr), + /// An equality expression. + EqualityExpr(EqualityExpr), + /// An exponentiation expression. + ExponentiationExpr(ExponentiationExpr), + /// A greater than or equal to expression. + GreaterEqualExpr(GreaterEqualExpr), + /// A greater than expression. + GreaterExpr(GreaterExpr), + /// An if expression. + IfExpr(IfExpr), + /// An import alias. + ImportAlias(ImportAlias), + /// An import statement. + ImportStatement(ImportStatement), + /// An index expression. + IndexExpr(IndexExpr), + /// An inequality expression. + InequalityExpr(InequalityExpr), + /// An input section. + InputSection(InputSection), + /// A less than or equal to expression. + LessEqualExpr(LessEqualExpr), + /// A less than expression. + LessExpr(LessExpr), + /// A literal array. + LiteralArray(LiteralArray), + /// A literal boolean. + LiteralBoolean(LiteralBoolean), + /// A literal float. + LiteralFloat(LiteralFloat), + /// A literal hints. + LiteralHints(LiteralHints), + /// A literal hints item. + LiteralHintsItem(LiteralHintsItem), + /// A literal input. + LiteralInput(LiteralInput), + /// A literal input item. + LiteralInputItem(LiteralInputItem), + /// A literal integer. + LiteralInteger(LiteralInteger), + /// A literal map. + LiteralMap(LiteralMap), + /// A literal map item. + LiteralMapItem(LiteralMapItem), + /// A literal none. + LiteralNone(LiteralNone), + /// A literal null. + LiteralNull(LiteralNull), + /// A literal object. + LiteralObject(LiteralObject), + /// A literal object item. + LiteralObjectItem(LiteralObjectItem), + /// A literal output. + LiteralOutput(LiteralOutput), + /// A literal output item. + LiteralOutputItem(LiteralOutputItem), + /// A literal pair. + LiteralPair(LiteralPair), + /// A literal string. + LiteralString(LiteralString), + /// A literal struct. + LiteralStruct(LiteralStruct), + /// A literal struct item. + LiteralStructItem(LiteralStructItem), + /// A logical and expression. + LogicalAndExpr(LogicalAndExpr), + /// A logical not expression. + LogicalNotExpr(LogicalNotExpr), + /// A logical or expression. + LogicalOrExpr(LogicalOrExpr), + /// A map type. + MapType(MapType), + /// A metadata array. + MetadataArray(MetadataArray), + /// A metadata object. + MetadataObject(MetadataObject), + /// A metadata object item. + MetadataObjectItem(MetadataObjectItem), + /// A metadata section. + MetadataSection(MetadataSection), + /// A modulo expression. + ModuloExpr(ModuloExpr), + /// A multiplication expression. + MultiplicationExpr(MultiplicationExpr), + /// A reference to a name. + NameRef(NameRef), + /// A negation expression. + NegationExpr(NegationExpr), + /// An output section. + OutputSection(OutputSection), + /// A pair type. + PairType(PairType), + /// An object type. + ObjectType(ObjectType), + /// A parameter metadata section. + ParameterMetadataSection(ParameterMetadataSection), + /// A parenthesized expression. + ParenthesizedExpr(ParenthesizedExpr), + /// A placeholder. + Placeholder(Placeholder), + /// A primitive type. + PrimitiveType(PrimitiveType), + /// A requirements item. + RequirementsItem(RequirementsItem), + /// A requirements section. + RequirementsSection(RequirementsSection), + /// A runtime item. + RuntimeItem(RuntimeItem), + /// A runtime section. + RuntimeSection(RuntimeSection), + /// A scatter statement. + ScatterStatement(ScatterStatement), + /// The `sep` placeholder option. + SepOption(SepOption), + /// A struct definition. + StructDefinition(StructDefinition), + /// A subtraction expression. + SubtractionExpr(SubtractionExpr), + /// A task definition. + TaskDefinition(TaskDefinition), + /// A task item within a hints section. + TaskHintsItem(TaskHintsItem), + /// A hints section within a task. + TaskHintsSection(TaskHintsSection), + /// A `true`/`false` placeholder option. + TrueFalseOption(TrueFalseOption), + /// A reference to a type. + TypeRef(TypeRef), + /// An unbound declaration. + UnboundDecl(UnboundDecl), + /// A version statement. + VersionStatement(VersionStatement), + /// A workflow definition. + WorkflowDefinition(WorkflowDefinition), + /// A task item within a hints section. + WorkflowHintsItem(WorkflowHintsItem), + /// A hints section within a workflow. + WorkflowHintsSection(WorkflowHintsSection), +} + +ast_element_impl!( + Node, + node, + SyntaxNode, + [ + access_expr(): AccessExprNode => AccessExpr => AccessExpr, + addition_expr(): AdditionExprNode => AdditionExpr => AdditionExpr, + array_type(): ArrayTypeNode => ArrayType => ArrayType, + ast(): RootNode => Ast => Ast, + bound_decl(): BoundDeclNode => BoundDecl => BoundDecl, + call_after(): CallAfterNode => CallAfter => CallAfter, + call_alias(): CallAliasNode => CallAlias => CallAlias, + call_expr(): CallExprNode => CallExpr => CallExpr, + call_input_item(): CallInputItemNode => CallInputItem => CallInputItem, + call_statement(): CallStatementNode => CallStatement => CallStatement, + call_target(): CallTargetNode => CallTarget => CallTarget, + command_section(): CommandSectionNode => CommandSection => CommandSection, + conditional_statement(): ConditionalStatementNode => ConditionalStatement => ConditionalStatement, + default_option(): PlaceholderDefaultOptionNode => DefaultOption => DefaultOption, + division_expr(): DivisionExprNode => DivisionExpr => DivisionExpr, + equality_expr(): EqualityExprNode => EqualityExpr => EqualityExpr, + exponentiation_expr(): ExponentiationExprNode => ExponentiationExpr => ExponentiationExpr, + greater_equal_expr(): GreaterEqualExprNode => GreaterEqualExpr => GreaterEqualExpr, + greater_expr(): GreaterExprNode => GreaterExpr => GreaterExpr, + if_expr(): IfExprNode => IfExpr => IfExpr, + import_alias(): ImportAliasNode => ImportAlias => ImportAlias, + import_statement(): ImportStatementNode => ImportStatement => ImportStatement, + index_expr(): IndexExprNode => IndexExpr => IndexExpr, + inequality_expr(): InequalityExprNode => InequalityExpr => InequalityExpr, + input_section(): InputSectionNode => InputSection => InputSection, + less_equal_expr(): LessEqualExprNode => LessEqualExpr => LessEqualExpr, + less_expr(): LessExprNode => LessExpr => LessExpr, + literal_array(): LiteralArrayNode => LiteralArray => LiteralArray, + literal_boolean(): LiteralBooleanNode => LiteralBoolean => LiteralBoolean, + literal_float(): LiteralFloatNode => LiteralFloat => LiteralFloat, + literal_hints(): LiteralHintsNode => LiteralHints => LiteralHints, + literal_hints_item(): LiteralHintsItemNode => LiteralHintsItem => LiteralHintsItem, + literal_input(): LiteralInputNode => LiteralInput => LiteralInput, + literal_input_item(): LiteralInputItemNode => LiteralInputItem => LiteralInputItem, + literal_integer(): LiteralIntegerNode => LiteralInteger => LiteralInteger, + literal_map(): LiteralMapNode => LiteralMap => LiteralMap, + literal_map_item(): LiteralMapItemNode => LiteralMapItem => LiteralMapItem, + literal_none(): LiteralNoneNode => LiteralNone => LiteralNone, + literal_null(): LiteralNullNode => LiteralNull => LiteralNull, + literal_object(): LiteralObjectNode => LiteralObject => LiteralObject, + literal_object_item(): LiteralObjectItemNode => LiteralObjectItem => LiteralObjectItem, + literal_output(): LiteralOutputNode => LiteralOutput => LiteralOutput, + literal_output_item(): LiteralOutputItemNode => LiteralOutputItem => LiteralOutputItem, + literal_pair(): LiteralStringNode => LiteralPair => LiteralPair, + literal_string(): LiteralStringText => LiteralString => LiteralString, + literal_struct(): LiteralStructNode => LiteralStruct => LiteralStruct, + literal_struct_item(): LiteralStructItemNode => LiteralStructItem => LiteralStructItem, + logical_and_expr(): LogicalAndExprNode => LogicalAndExpr => LogicalAndExpr, + logical_not_expr(): LogicalNotExprNode => LogicalNotExpr => LogicalNotExpr, + logical_or_expr(): LogicalOrExprNode => LogicalOrExpr => LogicalOrExpr, + map_type(): MapTypeNode => MapType => MapType, + metadata_array(): MetadataArrayNode => MetadataArray => MetadataArray, + metadata_object(): MetadataObjectNode => MetadataObject => MetadataObject, + metadata_object_item(): MetadataObjectItemNode => MetadataObjectItem => MetadataObjectItem, + metadata_section(): MetadataSectionNode => MetadataSection => MetadataSection, + modulo_expr(): ModuloExprNode => ModuloExpr => ModuloExpr, + multiplication_expr(): MultiplicationExprNode => MultiplicationExpr => MultiplicationExpr, + name_ref(): NameRefNode => NameRef => NameRef, + negation_expr(): NegationExprNode => NegationExpr => NegationExpr, + object_type(): ObjectTypeNode => ObjectType => ObjectType, + output_section(): OutputSectionNode => OutputSection => OutputSection, + pair_type(): PairTypeNode => PairType => PairType, + parameter_metadata_section(): ParameterMetadataSectionNode => ParameterMetadataSection => ParameterMetadataSection, + parenthesized_expr(): ParenthesizedExprNode => ParenthesizedExpr => ParenthesizedExpr, + placeholder(): PlaceholderNode => Placeholder => Placeholder, + primitive_type(): PrimitiveTypeNode => PrimitiveType => PrimitiveType, + requirements_item(): RequirementsItemNode => RequirementsItem => RequirementsItem, + requirements_section(): RequirementsSectionNode => RequirementsSection => RequirementsSection, + runtime_item(): RuntimeItemNode => RuntimeItem => RuntimeItem, + runtime_section(): RuntimeSectionNode => RuntimeSection => RuntimeSection, + scatter_statement(): ScatterStatementNode => ScatterStatement => ScatterStatement, + sep_option(): PlaceholderSepOptionNode => SepOption => SepOption, + struct_definition(): StructDefinitionNode => StructDefinition => StructDefinition, + subtraction_expr(): SubtractionExprNode => SubtractionExpr => SubtractionExpr, + task_definition(): TaskDefinitionNode => TaskDefinition => TaskDefinition, + task_hints_item(): TaskHintsItemNode => TaskHintsItem => TaskHintsItem, + task_hints_section(): TaskHintsSectionNode => TaskHintsSection => TaskHintsSection, + true_false_option(): PlaceholderTrueFalseOptionNode => TrueFalseOption => TrueFalseOption, + type_ref(): TypeRefNode => TypeRef => TypeRef, + unbound_decl(): UnboundDeclNode => UnboundDecl => UnboundDecl, + version_statement(): VersionStatementNode => VersionStatement => VersionStatement, + workflow_definition(): WorkflowDefinitionNode => WorkflowDefinition => WorkflowDefinition, + workflow_hints_item(): WorkflowHintsItemNode => WorkflowHintsItem => WorkflowHintsItem, + workflow_hints_section(): WorkflowHintsSectionNode => WorkflowHintsSection => WorkflowHintsSection + ] +); + +/// An abstract syntax tree token. +/// +/// This enum has a variant for each struct implementing the [`AstToken`] trait. +#[derive(Clone, Debug)] +pub enum Token { + /// The `after` keyword. + AfterKeyword(AfterKeyword), + /// The `alias` keyword. + AliasKeyword(AliasKeyword), + /// The `Array` type keyword. + ArrayTypeKeyword(ArrayTypeKeyword), + /// The `as` keyword. + AsKeyword(AsKeyword), + /// The `=` symbol. + Assignment(Assignment), + /// The `*` symbol. + Asterisk(Asterisk), + /// The `Boolean` type keyword. + BooleanTypeKeyword(BooleanTypeKeyword), + /// The `call` keyword. + CallKeyword(CallKeyword), + /// The `}` symbol. + CloseBrace(CloseBrace), + /// The `]` symbol. + CloseBracket(CloseBracket), + /// The `>>>` symbol. + CloseHeredoc(CloseHeredoc), + /// The `)` symbol. + CloseParen(CloseParen), + /// The `:` symbol. + Colon(Colon), + /// The `,` symbol. + Comma(Comma), + /// The `command` keyword. + CommandKeyword(CommandKeyword), + /// The text within a command section. + CommandText(CommandText), + /// A comment. + Comment(Comment), + /// The `Directory` type keyword. + DirectoryTypeKeyword(DirectoryTypeKeyword), + /// The `.` symbol. + Dot(Dot), + /// The `"` symbol. + DoubleQuote(DoubleQuote), + /// The `else` keyword. + ElseKeyword(ElseKeyword), + /// The `==` symbol. + Equal(Equal), + /// The `!` symbol. + Exclamation(Exclamation), + /// The `**` symbol. + Exponentiation(Exponentiation), + /// The `false` keyword. + FalseKeyword(FalseKeyword), + /// The `File` type keyword. + FileTypeKeyword(FileTypeKeyword), + /// A float. + Float(Float), + /// The `Float` type keyword. + FloatTypeKeyword(FloatTypeKeyword), + /// The `>` symbol. + Greater(Greater), + /// The `>=` symbol. + GreaterEqual(GreaterEqual), + /// The `hints` keyword. + HintsKeyword(HintsKeyword), + /// An identity. + Ident(Ident), + /// The `if` keyword. + IfKeyword(IfKeyword), + /// The `import` keyword. + ImportKeyword(ImportKeyword), + /// The `in` keyword. + InKeyword(InKeyword), + /// The `input` keyword. + InputKeyword(InputKeyword), + /// An integer. + Integer(Integer), + /// The `Int` type keyword. + IntTypeKeyword(IntTypeKeyword), + /// The `<` symbol. + Less(Less), + /// The `<=` symbol. + LessEqual(LessEqual), + /// The `&&` symbol. + LogicalAnd(LogicalAnd), + /// The `||` symbol. + LogicalOr(LogicalOr), + /// The `Map` type keyword. + MapTypeKeyword(MapTypeKeyword), + /// The `meta` keyword. + MetaKeyword(MetaKeyword), + /// The `-` symbol. + Minus(Minus), + /// The `None` keyword. + NoneKeyword(NoneKeyword), + /// The `!=` symbol. + NotEqual(NotEqual), + /// The `null` keyword. + NullKeyword(NullKeyword), + /// The `object` keyword. + ObjectKeyword(ObjectKeyword), + /// The `Object` type keyword. + ObjectTypeKeyword(ObjectTypeKeyword), + /// The `{` symbol. + OpenBrace(OpenBrace), + /// The `[` symbol. + OpenBracket(OpenBracket), + /// The `<<<` symbol. + OpenHeredoc(OpenHeredoc), + /// The `(` symbol. + OpenParen(OpenParen), + /// The `output` keyword. + OutputKeyword(OutputKeyword), + /// The `Pair` type keyword. + PairTypeKeyword(PairTypeKeyword), + /// The `parameter_meta` keyword. + ParameterMetaKeyword(ParameterMetaKeyword), + /// The `%` symbol. + Percent(Percent), + /// One of the placeholder open symbols. + PlaceholderOpen(PlaceholderOpen), + /// The `+` symbol. + Plus(Plus), + /// The `?` symbol. + QuestionMark(QuestionMark), + /// The `requirements` keyword. + RequirementsKeyword(RequirementsKeyword), + /// The `runtime` keyword. + RuntimeKeyword(RuntimeKeyword), + /// The `scatter` keyword. + ScatterKeyword(ScatterKeyword), + /// The `'` symbol. + SingleQuote(SingleQuote), + /// The `/` symbol. + Slash(Slash), + /// The textual part of a string. + StringText(StringText), + /// The `String` type keyword. + StringTypeKeyword(StringTypeKeyword), + /// The `struct` keyword. + StructKeyword(StructKeyword), + /// The `task` keyword. + TaskKeyword(TaskKeyword), + /// The `then` keyword. + ThenKeyword(ThenKeyword), + /// The `true` keyword. + TrueKeyword(TrueKeyword), + /// A version. + Version(Version), + /// The `version` keyword. + VersionKeyword(VersionKeyword), + /// Whitespace. + Whitespace(Whitespace), + /// The `workflow` keyword. + WorkflowKeyword(WorkflowKeyword), +} + +ast_element_impl!( + Token, + token, + SyntaxToken, + [ + after_keyword(): AfterKeyword => AfterKeyword => AfterKeyword, + alias_keyword(): AliasKeyword => AliasKeyword => AliasKeyword, + array_type_keyword(): ArrayTypeKeyword => ArrayTypeKeyword => ArrayTypeKeyword, + as_keyword(): AsKeyword => AsKeyword => AsKeyword, + assignment(): Assignment => Assignment => Assignment, + asterisk(): Asterisk => Asterisk => Asterisk, + boolean_type_keyword(): BooleanTypeKeyword => BooleanTypeKeyword => BooleanTypeKeyword, + call_keyword(): CallKeyword => CallKeyword => CallKeyword, + close_brace(): CloseBrace => CloseBrace => CloseBrace, + close_brack(): CloseBracket => CloseBracket => CloseBracket, + close_heredoc(): CloseHeredoc => CloseHeredoc => CloseHeredoc, + close_paren(): CloseParen => CloseParen => CloseParen, + colon(): Colon => Colon => Colon, + comma(): Comma => Comma => Comma, + command_keyword(): CommandKeyword => CommandKeyword => CommandKeyword, + command_text(): LiteralCommandText => CommandText => CommandText, + comment(): Comment => Comment => Comment, + directory_type_keyword(): DirectoryTypeKeyword => DirectoryTypeKeyword => DirectoryTypeKeyword, + dot(): Dot => Dot => Dot, + double_quote(): DoubleQuote => DoubleQuote => DoubleQuote, + else_keyword(): ElseKeyword => ElseKeyword => ElseKeyword, + equal(): Equal => Equal => Equal, + exclaimation(): Exclamation => Exclamation => Exclamation, + exponentiation(): Exponentiation => Exponentiation => Exponentiation, + false_keyword(): FalseKeyword => FalseKeyword => FalseKeyword, + file_type_keyword(): FileTypeKeyword => FileTypeKeyword => FileTypeKeyword, + float(): Float => Float => Float, + float_type_keyword(): FloatTypeKeyword => FloatTypeKeyword => FloatTypeKeyword, + greater(): Greater => Greater => Greater, + greater_equal(): GreaterEqual => GreaterEqual => GreaterEqual, + hints_keyword(): HintsKeyword => HintsKeyword => HintsKeyword, + ident(): Ident => Ident => Ident, + if_keyword(): IfKeyword => IfKeyword => IfKeyword, + import_keyword(): ImportKeyword => ImportKeyword => ImportKeyword, + in_keyword(): InKeyword => InKeyword => InKeyword, + input_keyword(): InputKeyword => InputKeyword => InputKeyword, + integer(): Integer => Integer => Integer, + int_type_keyword(): IntTypeKeyword => IntTypeKeyword => IntTypeKeyword, + less(): Less => Less => Less, + less_equal(): LessEqual => LessEqual => LessEqual, + logical_and(): LogicalAnd => LogicalAnd => LogicalAnd, + logical_or(): LogicalOr => LogicalOr => LogicalOr, + map_type_keyword(): MapTypeKeyword => MapTypeKeyword => MapTypeKeyword, + meta_keyword(): MetaKeyword => MetaKeyword => MetaKeyword, + minus(): Minus => Minus => Minus, + none_keyword(): NoneKeyword => NoneKeyword => NoneKeyword, + not_equal(): NotEqual => NotEqual => NotEqual, + null_keyword(): NullKeyword => NullKeyword => NullKeyword, + object_keyword(): ObjectKeyword => ObjectKeyword => ObjectKeyword, + object_type_keyword(): ObjectTypeKeyword => ObjectTypeKeyword => ObjectTypeKeyword, + open_brace(): OpenBrace => OpenBrace => OpenBrace, + open_bracket(): OpenBracket => OpenBracket => OpenBracket, + open_heredoc(): OpenHeredoc => OpenHeredoc => OpenHeredoc, + open_paren(): OpenParen => OpenParen => OpenParen, + output_keyword(): OutputKeyword => OutputKeyword => OutputKeyword, + pair_type_keyword(): PairTypeKeyword => PairTypeKeyword => PairTypeKeyword, + parameter_meta_keyword(): ParameterMetaKeyword => ParameterMetaKeyword => ParameterMetaKeyword, + percent(): Percent => Percent => Percent, + placeholder_open(): PlaceholderOpen => PlaceholderOpen => PlaceholderOpen, + plus(): Plus => Plus => Plus, + question_mark(): QuestionMark => QuestionMark => QuestionMark, + requirements_keyword(): RequirementsKeyword => RequirementsKeyword => RequirementsKeyword, + runtime_keyword(): RuntimeKeyword => RuntimeKeyword => RuntimeKeyword, + scatter_keyword(): ScatterKeyword => ScatterKeyword => ScatterKeyword, + single_quote(): SingleQuote => SingleQuote => SingleQuote, + slash(): Slash => Slash => Slash, + string_text(): LiteralStringText => StringText => StringText, + string_type_keyword(): StringTypeKeyword => StringTypeKeyword => StringTypeKeyword, + struct_keyword(): StructKeyword => StructKeyword => StructKeyword, + task_keyword(): TaskKeyword => TaskKeyword => TaskKeyword, + then_keyword(): ThenKeyword => ThenKeyword => ThenKeyword, + true_keyword(): TrueKeyword => TrueKeyword => TrueKeyword, + version_keyword(): VersionKeyword => VersionKeyword => VersionKeyword, + version(): Version => Version => Version, + whitespace(): Whitespace => Whitespace => Whitespace, + workflow_keyword(): WorkflowKeyword => WorkflowKeyword => WorkflowKeyword + ] +); + +/// An abstract syntax tree element. +#[derive(Clone, Debug)] +pub enum Element { + /// An abstract syntax tree node. + Node(Node), + + /// An abstract syntax tree token. + Token(Token), +} + +impl Element { + /// Attempts to get a reference to the inner [`Node`]. + /// + /// * If `self` is a [`Element::Node`], then a reference to the inner + /// [`Node`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn as_node(&self) -> Option<&Node> { + match self { + Self::Node(node) => Some(node), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`Node`]. + /// + /// * If `self` is a [`Element::Node`], then the inner [`Node`] wrapped in + /// [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn into_node(self) -> Option { + match self { + Self::Node(node) => Some(node), + _ => None, + } + } + + /// Consumes `self` and returns the inner [`Node`]. + /// + /// # Panics + /// + /// If `self` is not a [`Element::Node`]. + pub fn unwrap_node(self) -> Node { + self.into_node() + .expect("expected `Element::Node` but got a different variant") + } + + /// Attempts to get a reference to the inner [`Token`]. + /// + /// * If `self` is a [`Element::Token`], then a reference to the inner + /// [`Token`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn as_token(&self) -> Option<&Token> { + match self { + Self::Token(token) => Some(token), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`Token`]. + /// + /// * If `self` is a [`Element::Token`], then the inner [`Token`] wrapped in + /// [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn into_token(self) -> Option { + match self { + Self::Token(token) => Some(token), + _ => None, + } + } + + /// Consumes `self` and returns the inner [`Token`]. + /// + /// # Panics + /// + /// If `self` is not a [`Element::Token`]. + pub fn unwrap_token(self) -> Token { + self.into_token() + .expect("expected `Element::Token` but got a different variant") + } + + /// Gets the underlying [`SyntaxElement`] from the [`Element`]. + pub fn syntax(&self) -> SyntaxElement { + match self { + Element::Node(node) => SyntaxElement::Node(node.syntax().clone()), + Element::Token(token) => SyntaxElement::Token(token.syntax().clone()), + } + } + + /// Gets the underlying [`SyntaxKind`] from the [`Element`]. + pub fn kind(&self) -> SyntaxKind { + match self { + Element::Node(node) => node.syntax().kind(), + Element::Token(token) => token.syntax().kind(), + } + } + + /// Returns whether the [`SyntaxElement`] represents trivia. + pub fn is_trivia(&self) -> bool { + match self { + Element::Node(node) => node.syntax().kind().is_trivia(), + Element::Token(token) => token.syntax().kind().is_trivia(), + } + } + + /// Casts a [`SyntaxElement`] to an [`Element`]. + /// + /// This is expected to always succeed, as any [`SyntaxElement`] _should_ + /// have a corresponding [`Element`] (and, if it doesn't, that's very + /// likely a bug). + pub fn cast(element: SyntaxElement) -> Self { + match &element { + NodeOrToken::Node(_) => { + Self::Node(Node::cast(element).expect("a syntax node should cast to a Node")) + } + NodeOrToken::Token(_) => { + Self::Token(Token::cast(element).expect("a syntax token should cast to a Token")) + } + } + } +} diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index 8ffc54fc0..cd92148af 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -42,6 +42,7 @@ pub use rowan::Direction; pub use rowan::ast::AstChildren; pub use rowan::ast::AstNode; pub use rowan::ast::support; +use v1::VersionKeyword; pub use wdl_grammar::Diagnostic; pub use wdl_grammar::Label; pub use wdl_grammar::Severity; @@ -58,9 +59,13 @@ pub use wdl_grammar::version; pub mod v1; +mod element; +#[cfg(test)] +mod registry; mod validation; mod visitor; +pub use element::*; pub use validation::*; pub use visitor::*; @@ -125,6 +130,36 @@ pub trait AstToken { } } +/// Finds the first child that casts to a particular [`AstToken`]. +pub fn token_child(parent: &SyntaxNode) -> Option { + parent + .children_with_tokens() + .filter_map(|c| c.into_token()) + .find_map(T::cast) +} + +/// Finds all children that cast to a particular [`AstToken`]. +pub fn token_children(parent: &SyntaxNode) -> impl Iterator { + parent + .children_with_tokens() + .filter_map(|c| c.into_token().and_then(T::cast)) +} + +/// An extension trait for [`AstNode`]. +pub trait AstNodeChildrenExt { + /// Gets the children of this [`AstNode`]. + fn children(&self) -> impl Iterator; +} + +impl> AstNodeChildrenExt for T { + fn children(&self) -> impl Iterator { + self.syntax() + .clone() + .children_with_tokens() + .map(|c| Token::cast(c).expect("element to cast to an AST element")) + } +} + /// Represents the AST of a [Document]. /// /// See [Document::ast]. @@ -173,6 +208,27 @@ impl Ast { pub struct Document(SyntaxNode); impl Document { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Document`]. + pub fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::RootNode + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Document`]. + pub fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self(syntax)) + } else { + None + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + &self.0 + } + /// Parses a document from the given source. /// /// A document and its AST elements are trivially cloned. @@ -234,26 +290,6 @@ impl Document { } } -impl AstNode for Document { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool { - kind == SyntaxKind::RootNode - } - - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { - Some(Self(syntax)) - } else { - None - } - } - - fn syntax(&self) -> &SyntaxNode { - &self.0 - } -} - impl fmt::Debug for Document { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) @@ -261,7 +297,7 @@ impl fmt::Debug for Document { } /// Represents a whitespace token in the AST. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Whitespace(SyntaxToken); impl AstToken for Whitespace { @@ -323,6 +359,11 @@ impl VersionStatement { pub fn version(&self) -> Version { token(&self.0).expect("version statement must have a version token") } + + /// Gets the version keyword of the version statement. + pub fn keyword(&self) -> VersionKeyword { + token(&self.0).expect("version statement must have a version keyword") + } } impl AstNode for VersionStatement { @@ -351,7 +392,7 @@ impl AstNode for VersionStatement { } /// Represents a version in the AST. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Version(SyntaxToken); impl AstToken for Version { diff --git a/wdl-ast/src/registry.rs b/wdl-ast/src/registry.rs new file mode 100644 index 000000000..5445361f1 --- /dev/null +++ b/wdl-ast/src/registry.rs @@ -0,0 +1,381 @@ +//! The AST node registry. +//! +//! The AST node registry was introduced only to ensure that all nodes in the +//! concrete syntax tree have one and _only_ one analogous AST entity. +//! +//! The reason this is important to ensure statically is because this assumption +//! of one-to-one mapping between elements within the two types of tree is +//! relied upon in downstream crates. For example, formatting works by +//! traversing the CST of a WDL document and attempting to cast a node to any +//! AST type that can then be recursively formatted. +//! +//! Furthermore, this is just a good invariant to uphold to ensure in general in +//! that the code remains straightforward to reason about (a CST element that +//! can map to multiple AST elements in different contexts is inherently +//! confusing). + +use std::any::type_name; +use std::collections::HashMap; +use std::sync::LazyLock; + +use wdl_grammar::ALL_SYNTAX_KIND; +use wdl_grammar::WorkflowDescriptionLanguage; + +use crate::AstNode; +use crate::AstToken; +use crate::Comment; +use crate::Ident; +use crate::SyntaxKind; +use crate::Version; +use crate::VersionStatement; +use crate::Whitespace; +use crate::v1; + +/// A private module for sealed traits. +/// +/// The traits are sealed because we want to ensure that we reserve the right to +/// implement them in the future unhindered without introducing breaking +/// changes. +mod private { + /// The sealed trait for [`AstNodeRegistrant`](super::AstNodeRegistrant). + pub trait SealedNode {} + + /// The sealed trait for [`AstTokenRegistrant`](super::AstTokenRegistrant). + pub trait SealedToken {} +} + +/// A registry of all known mappings between AST elements (individual Rust types +/// that implement the [`AstNode`] trait or [`AstToken`] trait) and the CST +/// elements they can be cast from (via [`SyntaxKind`]\(s)). +/// +/// This is useful for ensuring that AST elements have a one-to-one mapping with +/// CST element kinds. +static REGISTRY: LazyLock>> = LazyLock::new(|| { + let types = vec![ + Comment::register(), + Ident::register(), + v1::AccessExpr::register(), + v1::AdditionExpr::register(), + v1::AfterKeyword::register(), + v1::AliasKeyword::register(), + v1::ArrayType::register(), + v1::ArrayTypeKeyword::register(), + v1::AsKeyword::register(), + v1::Assignment::register(), + v1::Ast::register(), + v1::Asterisk::register(), + v1::BooleanTypeKeyword::register(), + v1::BoundDecl::register(), + v1::CallAfter::register(), + v1::CallAlias::register(), + v1::CallExpr::register(), + v1::CallInputItem::register(), + v1::CallKeyword::register(), + v1::CallStatement::register(), + v1::CallTarget::register(), + v1::CloseBrace::register(), + v1::CloseBracket::register(), + v1::CloseHeredoc::register(), + v1::CloseParen::register(), + v1::Colon::register(), + v1::Comma::register(), + v1::CommandKeyword::register(), + v1::CommandSection::register(), + v1::CommandText::register(), + v1::ConditionalStatement::register(), + v1::DefaultOption::register(), + v1::DirectoryTypeKeyword::register(), + v1::DivisionExpr::register(), + v1::Dot::register(), + v1::DoubleQuote::register(), + v1::ElseKeyword::register(), + v1::Equal::register(), + v1::EqualityExpr::register(), + v1::Exclamation::register(), + v1::Exponentiation::register(), + v1::ExponentiationExpr::register(), + v1::FalseKeyword::register(), + v1::FileTypeKeyword::register(), + v1::Float::register(), + v1::FloatTypeKeyword::register(), + v1::Greater::register(), + v1::GreaterEqual::register(), + v1::GreaterEqualExpr::register(), + v1::GreaterExpr::register(), + v1::HintsKeyword::register(), + v1::IfExpr::register(), + v1::IfKeyword::register(), + v1::ImportAlias::register(), + v1::ImportKeyword::register(), + v1::ImportStatement::register(), + v1::IndexExpr::register(), + v1::InequalityExpr::register(), + v1::InKeyword::register(), + v1::InputKeyword::register(), + v1::InputSection::register(), + v1::Integer::register(), + v1::IntTypeKeyword::register(), + v1::Less::register(), + v1::LessEqual::register(), + v1::LessEqualExpr::register(), + v1::LessExpr::register(), + v1::LiteralArray::register(), + v1::LiteralBoolean::register(), + v1::LiteralFloat::register(), + v1::LiteralHints::register(), + v1::LiteralHintsItem::register(), + v1::LiteralInput::register(), + v1::LiteralInputItem::register(), + v1::LiteralInteger::register(), + v1::LiteralMap::register(), + v1::LiteralMapItem::register(), + v1::LiteralNone::register(), + v1::LiteralNull::register(), + v1::LiteralObject::register(), + v1::LiteralObjectItem::register(), + v1::LiteralOutput::register(), + v1::LiteralOutputItem::register(), + v1::LiteralPair::register(), + v1::LiteralString::register(), + v1::LiteralStruct::register(), + v1::LiteralStructItem::register(), + v1::LogicalAnd::register(), + v1::LogicalAndExpr::register(), + v1::LogicalNotExpr::register(), + v1::LogicalOr::register(), + v1::LogicalOrExpr::register(), + v1::MapType::register(), + v1::MapTypeKeyword::register(), + v1::MetadataArray::register(), + v1::MetadataObject::register(), + v1::MetadataObjectItem::register(), + v1::MetadataSection::register(), + v1::MetaKeyword::register(), + v1::Minus::register(), + v1::ModuloExpr::register(), + v1::MultiplicationExpr::register(), + v1::NameRef::register(), + v1::NegationExpr::register(), + v1::NoneKeyword::register(), + v1::NotEqual::register(), + v1::NullKeyword::register(), + v1::ObjectKeyword::register(), + v1::ObjectType::register(), + v1::ObjectTypeKeyword::register(), + v1::OpenBrace::register(), + v1::OpenBracket::register(), + v1::OpenHeredoc::register(), + v1::OpenParen::register(), + v1::OutputKeyword::register(), + v1::OutputSection::register(), + v1::PairType::register(), + v1::PairTypeKeyword::register(), + v1::ParameterMetadataSection::register(), + v1::ParameterMetaKeyword::register(), + v1::ParenthesizedExpr::register(), + v1::Percent::register(), + v1::Placeholder::register(), + v1::PlaceholderOpen::register(), + v1::Plus::register(), + v1::PrimitiveType::register(), + v1::QuestionMark::register(), + v1::RequirementsItem::register(), + v1::RequirementsKeyword::register(), + v1::RequirementsSection::register(), + v1::RuntimeItem::register(), + v1::RuntimeKeyword::register(), + v1::RuntimeSection::register(), + v1::ScatterKeyword::register(), + v1::ScatterStatement::register(), + v1::SepOption::register(), + v1::SingleQuote::register(), + v1::Slash::register(), + v1::StringText::register(), + v1::StringTypeKeyword::register(), + v1::StructDefinition::register(), + v1::StructKeyword::register(), + v1::SubtractionExpr::register(), + v1::TaskDefinition::register(), + v1::TaskHintsItem::register(), + v1::TaskHintsSection::register(), + v1::TaskKeyword::register(), + v1::ThenKeyword::register(), + v1::TrueFalseOption::register(), + v1::TrueKeyword::register(), + v1::TypeRef::register(), + v1::UnboundDecl::register(), + v1::Unknown::register(), + v1::VersionKeyword::register(), + v1::WorkflowDefinition::register(), + v1::WorkflowHintsItem::register(), + v1::WorkflowHintsSection::register(), + v1::WorkflowHintsArray::register(), + v1::WorkflowHintsObject::register(), + v1::WorkflowHintsObjectItem::register(), + v1::WorkflowKeyword::register(), + Version::register(), + VersionStatement::register(), + Whitespace::register(), + ]; + + let mut result = HashMap::new(); + + // NOTE: this is done this way instead of simply collecting into a + // [`HashMap`] to ensure on the fly that no keys are duplicated. + for (r#type, kinds) in types { + if result.contains_key(&r#type) { + panic!("the `{:?}` key is duplicated", r#type); + } + + result.insert(r#type, kinds); + } + + result +}); + +/// Computes the inverse of the registry. +/// +/// In other words, maps CST elements—dynamically typed as [`SyntaxKind`]s—to +/// the corresponding AST element(s) that can cast from them. +/// +/// This is useful for ensuring that AST elements have a one-to-one mapping with +/// CST element kinds. +fn inverse() -> HashMap> { + let mut result = HashMap::>::new(); + + for (key, values) in REGISTRY.iter() { + for value in values.into_iter() { + result.entry(value.to_owned()).or_default().push(*key); + } + } + + result + .into_iter() + .map(|(key, values)| (key, values.into_boxed_slice())) + .collect() +} + +trait AstNodeRegistrant: private::SealedNode { + /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST node type. + fn register() -> (&'static str, Box<[SyntaxKind]>); +} + +impl + 'static> private::SealedNode for T {} + +impl + 'static> AstNodeRegistrant for T { + fn register() -> (&'static str, Box<[SyntaxKind]>) { + ( + type_name::(), + ALL_SYNTAX_KIND + .iter() + .filter(|kind| T::can_cast(**kind)) + .cloned() + .collect::>() + .into_boxed_slice(), + ) + } +} + +trait AstTokenRegistrant: private::SealedToken { + /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST token + /// type. + fn register() -> (&'static str, Box<[SyntaxKind]>); +} + +impl private::SealedToken for T {} + +impl AstTokenRegistrant for T { + fn register() -> (&'static str, Box<[SyntaxKind]>) { + ( + type_name::(), + ALL_SYNTAX_KIND + .iter() + .filter(|kind| T::can_cast(**kind)) + .cloned() + .collect::>() + .into_boxed_slice(), + ) + } +} + +mod tests { + use super::*; + + /// This test ensures there is a one-to-one mapping between CST elements + /// ([`SyntaxKind`]\(s)) and AST elements (Rust types that implement + /// the [`AstNode`] trait or the [`AstToken`] trait). + /// + /// The importance of this is described at the top of the module. + #[test] + fn ensure_one_to_one() { + let mut missing = Vec::new(); + let mut multiple = Vec::new(); + + let inverse_registry = inverse(); + + for kind in ALL_SYNTAX_KIND { + // NOTE: these are symbolic elements and should not be included in + // the analysis here. + if kind.is_symbolic() { + continue; + } + + match inverse_registry.get(kind) { + // SAFETY: because this is an inverse registry, only + // [`SyntaxKind`]s with at least one registered implementing + // type would be registered here. Thus, by design of the + // `inverse()` method, this will never occur. + Some(values) if values.is_empty() => { + unreachable!("the inverse registry should never contain an empty array") + } + Some(values) if values.len() > 1 => multiple.push((kind, values)), + None => missing.push(kind), + // NOTE: this is essentially only if the values exist and the + // length is 1—in that case, there is a one to one mapping, + // which is what we would like the case to be. + _ => {} + } + } + + if !missing.is_empty() { + let mut missing = missing + .into_iter() + .map(|kind| format!("{:?}", kind)) + .collect::>(); + missing.sort(); + + panic!( + "detected `SyntaxKind`s without an associated `AstNode`/`AstToken` (n={}): {}", + missing.len(), + missing.join(", ") + ) + } + + if !multiple.is_empty() { + multiple.sort(); + let mut multiple = multiple + .into_iter() + .map(|(kind, types)| { + let mut types = types.clone(); + types.sort(); + + let mut result = format!("== {:?} ==", kind); + for r#type in types { + result.push_str("\n* "); + result.push_str(r#type); + } + + result + }) + .collect::>(); + multiple.sort(); + + panic!( + "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s \ + (n={}):\n\n{}", + multiple.len(), + multiple.join("\n\n") + ) + } + } +} diff --git a/wdl-ast/src/v1.rs b/wdl-ast/src/v1.rs index d434f1db0..93dfcc5c6 100644 --- a/wdl-ast/src/v1.rs +++ b/wdl-ast/src/v1.rs @@ -12,6 +12,7 @@ mod expr; mod import; mod r#struct; mod task; +mod tokens; mod workflow; pub use decls::*; @@ -19,6 +20,7 @@ pub use expr::*; pub use import::*; pub use r#struct::*; pub use task::*; +pub use tokens::*; pub use workflow::*; /// Represents a WDL V1 Abstract Syntax Tree (AST). @@ -39,8 +41,8 @@ pub struct Ast(SyntaxNode); impl Ast { /// Gets all of the document items in the AST. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + DocumentItem::children(&self.0) } /// Gets the import statements in the AST. @@ -102,10 +104,10 @@ pub enum DocumentItem { Workflow(WorkflowDefinition), } -impl AstNode for DocumentItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl DocumentItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`DocumentItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -118,25 +120,150 @@ impl AstNode for DocumentItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`DocumentItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::ImportStatementNode => Some(Self::Import(ImportStatement(syntax))), - SyntaxKind::StructDefinitionNode => Some(Self::Struct(StructDefinition(syntax))), - SyntaxKind::TaskDefinitionNode => Some(Self::Task(TaskDefinition(syntax))), - SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow(WorkflowDefinition(syntax))), + SyntaxKind::ImportStatementNode => Some(Self::Import( + ImportStatement::cast(syntax).expect("import statement to cast"), + )), + SyntaxKind::StructDefinitionNode => Some(Self::Struct( + StructDefinition::cast(syntax).expect("struct definition to cast"), + )), + SyntaxKind::TaskDefinitionNode => Some(Self::Task( + TaskDefinition::cast(syntax).expect("task definition to cast"), + )), + SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow( + WorkflowDefinition::cast(syntax).expect("workflow definition to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Import(element) => element.syntax(), + Self::Struct(element) => element.syntax(), + Self::Task(element) => element.syntax(), + Self::Workflow(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`ImportStatement`]. + /// + /// * If `self` is a [`DocumentItem::Import`], then a reference to the inner + /// [`ImportStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_import_statement(&self) -> Option<&ImportStatement> { + match self { + DocumentItem::Import(import) => Some(import), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`ImportStatement`]. + /// + /// * If `self` is a [`DocumentItem::Import`], then the inner + /// [`ImportStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_import_statement(self) -> Option { + match self { + DocumentItem::Import(import) => Some(import), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`StructDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Struct`], then a reference to the inner + /// [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_struct_definition(&self) -> Option<&StructDefinition> { + match self { + DocumentItem::Struct(r#struct) => Some(r#struct), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`StructDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Struct`], then the inner + /// [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_struct_definition(self) -> Option { match self { - Self::Import(i) => &i.0, - Self::Struct(s) => &s.0, - Self::Task(t) => &t.0, - Self::Workflow(w) => &w.0, + DocumentItem::Struct(r#struct) => Some(r#struct), + _ => None, } } + + /// Attempts to get a reference to the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Task`], then a reference to the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_task_definition(&self) -> Option<&TaskDefinition> { + match self { + DocumentItem::Task(task) => Some(task), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Task`], then the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_task_definition(self) -> Option { + match self { + DocumentItem::Task(task) => Some(task), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Workflow`], then a reference to the + /// inner [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_workflow_definition(&self) -> Option<&WorkflowDefinition> { + match self { + DocumentItem::Workflow(workflow) => Some(workflow), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Workflow`], then the inner + /// [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_workflow_definition(self) -> Option { + match self { + DocumentItem::Workflow(workflow) => Some(workflow), + _ => None, + } + } + + /// Finds the first child that can be cast to an [`DocumentItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`DocumentItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`DocumentItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`DocumentItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } diff --git a/wdl-ast/src/v1/decls.rs b/wdl-ast/src/v1/decls.rs index 45e78f090..8053cdbb7 100644 --- a/wdl-ast/src/v1/decls.rs +++ b/wdl-ast/src/v1/decls.rs @@ -10,7 +10,6 @@ use crate::SyntaxKind; use crate::SyntaxNode; use crate::WorkflowDescriptionLanguage; use crate::support; -use crate::support::child; use crate::token; /// Represents a `Map` type. @@ -87,7 +86,7 @@ pub struct ArrayType(SyntaxNode); impl ArrayType { /// Gets the element type of the array. pub fn element_type(&self) -> Type { - child(&self.0).expect("array should have an element type") + Type::child(&self.0).expect("array should have an element type") } /// Determines if the type has the "non-empty" qualifier. @@ -444,6 +443,61 @@ pub enum Type { } impl Type { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Type`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::MapTypeNode + | SyntaxKind::ArrayTypeNode + | SyntaxKind::PairTypeNode + | SyntaxKind::ObjectTypeNode + | SyntaxKind::TypeRefNode + | SyntaxKind::PrimitiveTypeNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Type`]. + pub fn cast(syntax: SyntaxNode) -> Option { + match syntax.kind() { + SyntaxKind::MapTypeNode => { + Some(Self::Map(MapType::cast(syntax).expect("map type to cast"))) + } + SyntaxKind::ArrayTypeNode => Some(Self::Array( + ArrayType::cast(syntax).expect("array type to cast"), + )), + SyntaxKind::PairTypeNode => Some(Self::Pair( + PairType::cast(syntax).expect("pair type to cast"), + )), + SyntaxKind::ObjectTypeNode => Some(Self::Object( + ObjectType::cast(syntax).expect("object type to cast"), + )), + SyntaxKind::TypeRefNode => { + Some(Self::Ref(TypeRef::cast(syntax).expect("type ref to cast"))) + } + SyntaxKind::PrimitiveTypeNode => Some(Self::Primitive( + PrimitiveType::cast(syntax).expect("primitive type to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Type::Map(element) => element.syntax(), + Type::Array(element) => element.syntax(), + Type::Pair(element) => element.syntax(), + Type::Object(element) => element.syntax(), + Type::Ref(element) => element.syntax(), + Type::Primitive(element) => element.syntax(), + } + } + /// Determines if the type is optional. pub fn is_optional(&self) -> bool { match self { @@ -456,6 +510,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`MapType`]. + /// + /// * If `self` is a [`Type::Map`], then a reference to the inner + /// [`MapType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_map_type(&self) -> Option<&MapType> { + match self { + Self::Map(map) => Some(map), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MapType`]. + /// + /// * If `self` is a [`Type::Map`], then the inner [`MapType`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_map_type(self) -> Option { + match self { + Self::Map(map) => Some(map), + _ => None, + } + } + /// Unwraps the type into a map type. /// /// # Panics @@ -468,6 +546,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`ArrayType`]. + /// + /// * If `self` is a [`Type::Array`], then a reference to the inner + /// [`ArrayType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_array_type(&self) -> Option<&ArrayType> { + match self { + Self::Array(array) => Some(array), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`ArrayType`]. + /// + /// * If `self` is a [`Type::Array`], then the inner [`ArrayType`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_array_type(self) -> Option { + match self { + Self::Array(array) => Some(array), + _ => None, + } + } + /// Unwraps the type into an array type. /// /// # Panics @@ -480,6 +582,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`PairType`]. + /// + /// * If `self` is a [`Type::Pair`], then a reference to the inner + /// [`PairType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_pair_type(&self) -> Option<&PairType> { + match self { + Self::Pair(pair) => Some(pair), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`PairType`]. + /// + /// * If `self` is a [`Type::Pair`], then the inner [`PairType`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_pair_type(self) -> Option { + match self { + Self::Pair(pair) => Some(pair), + _ => None, + } + } + /// Unwraps the type into a pair type. /// /// # Panics @@ -492,6 +618,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`ObjectType`]. + /// + /// * If `self` is a [`Type::Object`], then a reference to the inner + /// [`ObjectType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_object_type(&self) -> Option<&ObjectType> { + match self { + Self::Object(object) => Some(object), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`ObjectType`]. + /// + /// * If `self` is a [`Type::Object`], then the inner [`ObjectType`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_object_type(self) -> Option { + match self { + Self::Object(object) => Some(object), + _ => None, + } + } + /// Unwraps the type into an object type. /// /// # Panics @@ -504,6 +654,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`TypeRef`]. + /// + /// * If `self` is a [`Type::Ref`], then a reference to the inner + /// [`TypeRef`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_type_ref(&self) -> Option<&TypeRef> { + match self { + Self::Ref(type_ref) => Some(type_ref), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TypeRef`]. + /// + /// * If `self` is a [`Type::Ref`], then the inner [`TypeRef`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_type_ref(self) -> Option { + match self { + Self::Ref(type_ref) => Some(type_ref), + _ => None, + } + } + /// Unwraps the type into a type reference. /// /// # Panics @@ -516,6 +690,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`PrimitiveType`]. + /// + /// * If `self` is a [`Type::Primitive`], then a reference to the inner + /// [`PrimitiveType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_primitive_type(&self) -> Option<&PrimitiveType> { + match self { + Self::Primitive(primitive) => Some(primitive), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`PrimitiveType`]. + /// + /// * If `self` is a [`Type::Primitive`], then the inner [`PrimitiveType`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_primitive_type(self) -> Option { + match self { + Self::Primitive(primitive) => Some(primitive), + _ => None, + } + } + /// Unwraps the type into a primitive type. /// /// # Panics @@ -527,50 +725,23 @@ impl Type { _ => panic!("not a primitive type"), } } -} - -impl AstNode for Type { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::MapTypeNode - | SyntaxKind::ArrayTypeNode - | SyntaxKind::PairTypeNode - | SyntaxKind::ObjectTypeNode - | SyntaxKind::TypeRefNode - | SyntaxKind::PrimitiveTypeNode - ) - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::MapTypeNode => Some(Self::Map(MapType(syntax))), - SyntaxKind::ArrayTypeNode => Some(Self::Array(ArrayType(syntax))), - SyntaxKind::PairTypeNode => Some(Self::Pair(PairType(syntax))), - SyntaxKind::ObjectTypeNode => Some(Self::Object(ObjectType(syntax))), - SyntaxKind::TypeRefNode => Some(Self::Ref(TypeRef(syntax))), - SyntaxKind::PrimitiveTypeNode => Some(Self::Primitive(PrimitiveType(syntax))), - _ => None, - } + /// Finds the first child that can be cast to an [`Type`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`Type`] to implement + /// the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Type::Map(m) => &m.0, - Type::Array(a) => &a.0, - Type::Pair(p) => &p.0, - Type::Object(o) => &o.0, - Type::Ref(r) => &r.0, - Type::Primitive(t) => &t.0, - } + /// Finds all children that can be cast to an [`Type`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`Type`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -594,7 +765,7 @@ pub struct UnboundDecl(pub(crate) SyntaxNode); impl UnboundDecl { /// Gets the type of the declaration. pub fn ty(&self) -> Type { - child(&self.0).expect("unbound declaration should have a type") + Type::child(&self.0).expect("unbound declaration should have a type") } /// Gets the name of the declaration. @@ -635,7 +806,7 @@ pub struct BoundDecl(pub(crate) SyntaxNode); impl BoundDecl { /// Gets the type of the declaration. pub fn ty(&self) -> Type { - child(&self.0).expect("bound declaration should have a type") + Type::child(&self.0).expect("bound declaration should have a type") } /// Gets the name of the declaration. @@ -645,7 +816,7 @@ impl BoundDecl { /// Gets the expression the declaration is bound to. pub fn expr(&self) -> Expr { - child(&self.0).expect("bound declaration should have an expression") + Expr::child(&self.0).expect("bound declaration should have an expression") } } @@ -684,6 +855,40 @@ pub enum Decl { } impl Decl { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Decl`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + kind == SyntaxKind::BoundDeclNode || kind == SyntaxKind::UnboundDeclNode + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Decl`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::BoundDeclNode => Some(Self::Bound( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), + SyntaxKind::UnboundDeclNode => Some(Self::Unbound( + UnboundDecl::cast(syntax).expect("unbound decl to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Bound(element) => element.syntax(), + Self::Unbound(element) => element.syntax(), + } + } + /// Gets the type of the declaration. pub fn ty(&self) -> Type { match self { @@ -710,6 +915,30 @@ impl Decl { } } + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`Decl::Bound`], then a reference to the inner + /// [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_bound_decl(&self) -> Option<&BoundDecl> { + match self { + Self::Bound(bound_decl) => Some(bound_decl), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`BoundDecl`]. + /// + /// * If `self` is a [`Decl::Bound`], then the inner [`BoundDecl`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_bound_decl(self) -> Option { + match self { + Self::Bound(bound_decl) => Some(bound_decl), + _ => None, + } + } + /// Unwraps the declaration into a bound declaration. /// /// # Panics @@ -722,6 +951,30 @@ impl Decl { } } + /// Attempts to get a reference to the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`Decl::Unbound`], then a reference to the inner + /// [`UnboundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_unbound_decl(&self) -> Option<&UnboundDecl> { + match self { + Self::Unbound(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`Decl::Unbound`], then the inner [`UnboundDecl`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_unbound_decl(self) -> Option { + match self { + Self::Unbound(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + /// Unwraps the declaration into an unbound declaration. /// /// # Panics @@ -733,34 +986,23 @@ impl Decl { _ => panic!("not an unbound declaration"), } } -} - -impl AstNode for Decl { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - kind == SyntaxKind::BoundDeclNode || kind == SyntaxKind::UnboundDeclNode - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::BoundDeclNode => Some(Self::Bound(BoundDecl(syntax))), - SyntaxKind::UnboundDeclNode => Some(Self::Unbound(UnboundDecl(syntax))), - _ => None, - } + /// Finds the first child that can be cast to an [`Decl`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`Decl`] to implement + /// the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Bound(b) => &b.0, - Self::Unbound(u) => &u.0, - } + /// Finds all children that can be cast to an [`Decl`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`Decl`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } diff --git a/wdl-ast/src/v1/expr.rs b/wdl-ast/src/v1/expr.rs index 3ed1c545f..2c9cdc491 100644 --- a/wdl-ast/src/v1/expr.rs +++ b/wdl-ast/src/v1/expr.rs @@ -13,6 +13,7 @@ use crate::support; use crate::support::child; use crate::support::children; use crate::token; +use crate::token_child; /// Represents an expression. #[derive(Clone, Debug, PartialEq, Eq)] @@ -66,24 +67,172 @@ pub enum Expr { } impl Expr { - /// Attempts to reference a literal expression. - /// - /// - If the value is a literal expression, `Some()` is returned. - /// - Else, `None` is returned. + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Expr`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + if LiteralExpr::can_cast(kind) { + return true; + } + + matches!( + kind, + SyntaxKind::NameRefNode + | SyntaxKind::ParenthesizedExprNode + | SyntaxKind::IfExprNode + | SyntaxKind::LogicalNotExprNode + | SyntaxKind::NegationExprNode + | SyntaxKind::LogicalOrExprNode + | SyntaxKind::LogicalAndExprNode + | SyntaxKind::EqualityExprNode + | SyntaxKind::InequalityExprNode + | SyntaxKind::LessExprNode + | SyntaxKind::LessEqualExprNode + | SyntaxKind::GreaterExprNode + | SyntaxKind::GreaterEqualExprNode + | SyntaxKind::AdditionExprNode + | SyntaxKind::SubtractionExprNode + | SyntaxKind::MultiplicationExprNode + | SyntaxKind::DivisionExprNode + | SyntaxKind::ModuloExprNode + | SyntaxKind::ExponentiationExprNode + | SyntaxKind::CallExprNode + | SyntaxKind::IndexExprNode + | SyntaxKind::AccessExprNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Expr`]. + pub fn cast(syntax: SyntaxNode) -> Option { + if LiteralExpr::can_cast(syntax.kind()) { + return Some(Self::Literal( + LiteralExpr::cast(syntax).expect("literal expr should cast"), + )); + } + + match syntax.kind() { + SyntaxKind::NameRefNode => Some(Self::Name( + NameRef::cast(syntax).expect("name ref should cast"), + )), + SyntaxKind::ParenthesizedExprNode => Some(Self::Parenthesized( + ParenthesizedExpr::cast(syntax).expect("parenthesized expr should cast"), + )), + SyntaxKind::IfExprNode => { + Some(Self::If(IfExpr::cast(syntax).expect("if expr should cast"))) + } + SyntaxKind::LogicalNotExprNode => Some(Self::LogicalNot( + LogicalNotExpr::cast(syntax).expect("logical not expr should cast"), + )), + SyntaxKind::NegationExprNode => Some(Self::Negation( + NegationExpr::cast(syntax).expect("negation expr should cast"), + )), + SyntaxKind::LogicalOrExprNode => Some(Self::LogicalOr( + LogicalOrExpr::cast(syntax).expect("logical or expr should cast"), + )), + SyntaxKind::LogicalAndExprNode => Some(Self::LogicalAnd( + LogicalAndExpr::cast(syntax).expect("logical and expr should cast"), + )), + SyntaxKind::EqualityExprNode => Some(Self::Equality( + EqualityExpr::cast(syntax).expect("equality expr should cast"), + )), + SyntaxKind::InequalityExprNode => Some(Self::Inequality( + InequalityExpr::cast(syntax).expect("inequality expr should cast"), + )), + SyntaxKind::LessExprNode => Some(Self::Less( + LessExpr::cast(syntax).expect("less expr should cast"), + )), + SyntaxKind::LessEqualExprNode => Some(Self::LessEqual( + LessEqualExpr::cast(syntax).expect("less equal expr should cast"), + )), + SyntaxKind::GreaterExprNode => Some(Self::Greater( + GreaterExpr::cast(syntax).expect("greater expr should cast"), + )), + SyntaxKind::GreaterEqualExprNode => Some(Self::GreaterEqual( + GreaterEqualExpr::cast(syntax).expect("greater equal expr should cast"), + )), + SyntaxKind::AdditionExprNode => Some(Self::Addition( + AdditionExpr::cast(syntax).expect("addition expr should cast"), + )), + SyntaxKind::SubtractionExprNode => Some(Self::Subtraction( + SubtractionExpr::cast(syntax).expect("subtraction expr should cast"), + )), + SyntaxKind::MultiplicationExprNode => Some(Self::Multiplication( + MultiplicationExpr::cast(syntax).expect("multiplication expr should cast"), + )), + SyntaxKind::DivisionExprNode => Some(Self::Division( + DivisionExpr::cast(syntax).expect("division expr should cast"), + )), + SyntaxKind::ModuloExprNode => Some(Self::Modulo( + ModuloExpr::cast(syntax).expect("modulo expr should cast"), + )), + SyntaxKind::ExponentiationExprNode => Some(Self::Exponentiation( + ExponentiationExpr::cast(syntax).expect("exponentation expr should cast"), + )), + SyntaxKind::CallExprNode => Some(Self::Call( + CallExpr::cast(syntax).expect("call expr should cast"), + )), + SyntaxKind::IndexExprNode => Some(Self::Index( + IndexExpr::cast(syntax).expect("index expr should cast"), + )), + SyntaxKind::AccessExprNode => Some(Self::Access( + AccessExpr::cast(syntax).expect("access expr should cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Expr::Literal(element) => element.syntax(), + Expr::Name(element) => element.syntax(), + Expr::Parenthesized(element) => element.syntax(), + Expr::If(element) => element.syntax(), + Expr::LogicalNot(element) => element.syntax(), + Expr::Negation(element) => element.syntax(), + Expr::LogicalOr(element) => element.syntax(), + Expr::LogicalAnd(element) => element.syntax(), + Expr::Equality(element) => element.syntax(), + Expr::Inequality(element) => element.syntax(), + Expr::Less(element) => element.syntax(), + Expr::LessEqual(element) => element.syntax(), + Expr::Greater(element) => element.syntax(), + Expr::GreaterEqual(element) => element.syntax(), + Expr::Addition(element) => element.syntax(), + Expr::Subtraction(element) => element.syntax(), + Expr::Multiplication(element) => element.syntax(), + Expr::Division(element) => element.syntax(), + Expr::Modulo(element) => element.syntax(), + Expr::Exponentiation(element) => element.syntax(), + Expr::Call(element) => element.syntax(), + Expr::Index(element) => element.syntax(), + Expr::Access(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`LiteralExpr`]. + /// + /// * If `self` is a [`Expr::Literal`], then a reference to the inner + /// [`LiteralExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_literal(&self) -> Option<&LiteralExpr> { match self { - Self::Literal(expr) => Some(expr), + Self::Literal(literal) => Some(literal), _ => None, } } - /// Consumes `self` and attempts to return a literal expression. + /// Consumes `self` and attempts to return the inner [`LiteralExpr`]. /// - /// - If the value is a literal expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Literal`], then the inner [`LiteralExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_literal(self) -> Option { match self { - Self::Literal(expr) => Some(expr), + Self::Literal(literal) => Some(literal), _ => None, } } @@ -100,24 +249,26 @@ impl Expr { } } - /// Attempts to reference a name reference. + /// Attempts to get a reference to the inner [`NameRef`]. /// - /// - If the value is a name reference, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Name`], then a reference to the inner + /// [`NameRef`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_name_ref(&self) -> Option<&NameRef> { match self { - Self::Name(expr) => Some(expr), + Self::Name(name_ref) => Some(name_ref), _ => None, } } - /// Consumes `self` and attempts to return a name reference. + /// Consumes `self` and attempts to return the inner [`NameRef`]. /// - /// - If the value is a name reference, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Name`], then the inner [`NameRef`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_name_ref(self) -> Option { match self { - Self::Name(expr) => Some(expr), + Self::Name(name_ref) => Some(name_ref), _ => None, } } @@ -134,24 +285,26 @@ impl Expr { } } - /// Attempts to reference a parenthesized expression. + /// Attempts to get a reference to the inner [`ParenthesizedExpr`]. /// - /// - If the value is a parenthesized expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Parenthesized`], then a reference to the inner + /// [`ParenthesizedExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_parenthesized(&self) -> Option<&ParenthesizedExpr> { match self { - Self::Parenthesized(expr) => Some(expr), + Self::Parenthesized(parenthesized) => Some(parenthesized), _ => None, } } - /// Consumes `self` and attempts to return a parenthesized expression. + /// Consumes `self` and attempts to return the inner [`ParenthesizedExpr`]. /// - /// - If the value is a parenthesized expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Parenthesized`], then the inner + /// [`ParenthesizedExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_parenthesized(self) -> Option { match self { - Self::Parenthesized(expr) => Some(expr), + Self::Parenthesized(parenthesized) => Some(parenthesized), _ => None, } } @@ -168,24 +321,26 @@ impl Expr { } } - /// Attempts to reference an `if` expression. + /// Attempts to get a reference to the inner [`IfExpr`]. /// - /// - If the value is an `if` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::If`], then a reference to the inner [`IfExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_if(&self) -> Option<&IfExpr> { match self { - Self::If(expr) => Some(expr), + Self::If(r#if) => Some(r#if), _ => None, } } - /// Consumes `self` and attempts to return an `if` expression. + /// Consumes `self` and attempts to return the inner [`IfExpr`]. /// - /// - If the value is an `if` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::If`], then the inner [`IfExpr`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_if(self) -> Option { match self { - Self::If(expr) => Some(expr), + Self::If(r#if) => Some(r#if), _ => None, } } @@ -202,24 +357,26 @@ impl Expr { } } - /// Attempts to reference a logical `not` expression. + /// Attempts to get a reference to the inner [`LogicalNotExpr`]. /// - /// - If the value is a logical `not` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalNot`], then a reference to the inner + /// [`LogicalNotExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_logical_not(&self) -> Option<&LogicalNotExpr> { match self { - Self::LogicalNot(expr) => Some(expr), + Self::LogicalNot(logical_not) => Some(logical_not), _ => None, } } - /// Consumes `self` and attempts to return a logical `not` expression. + /// Consumes `self` and attempts to return the inner [`LogicalNotExpr`]. /// - /// - If the value is a logical `not` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalNot`], then the inner [`LogicalNotExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_logical_not(self) -> Option { match self { - Self::LogicalNot(expr) => Some(expr), + Self::LogicalNot(logical_not) => Some(logical_not), _ => None, } } @@ -236,24 +393,26 @@ impl Expr { } } - /// Attempts to reference a negation expression. + /// Attempts to get a reference to the inner [`NegationExpr`]. /// - /// - If the value is a negation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Negation`], then a reference to the inner + /// [`NegationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_negation(&self) -> Option<&NegationExpr> { match self { - Self::Negation(expr) => Some(expr), + Self::Negation(negation) => Some(negation), _ => None, } } - /// Consumes `self` and attempts to return a negation expression. + /// Consumes `self` and attempts to return the inner [`NegationExpr`]. /// - /// - If the value is a negation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Negation`], then the inner [`NegationExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_negation(self) -> Option { match self { - Self::Negation(expr) => Some(expr), + Self::Negation(negation) => Some(negation), _ => None, } } @@ -270,24 +429,26 @@ impl Expr { } } - /// Attempts to reference a logical `or` expression. + /// Attempts to get a reference to the inner [`LogicalOrExpr`]. /// - /// - If the value is a logical `or` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalOr`], then a reference to the inner + /// [`LogicalOrExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_logical_or(&self) -> Option<&LogicalOrExpr> { match self { - Self::LogicalOr(expr) => Some(expr), + Self::LogicalOr(logical_or) => Some(logical_or), _ => None, } } - /// Consumes `self` and attempts to return a logical `or` expression. + /// Consumes `self` and attempts to return the inner [`LogicalOrExpr`]. /// - /// - If the value is a logical `or` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalOr`], then the inner [`LogicalOrExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_logical_or(self) -> Option { match self { - Self::LogicalOr(expr) => Some(expr), + Self::LogicalOr(logical_or) => Some(logical_or), _ => None, } } @@ -304,24 +465,26 @@ impl Expr { } } - /// Attempts to reference a logical `and` expression. + /// Attempts to get a reference to the inner [`LogicalAndExpr`]. /// - /// - If the value is a logical `and` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalAnd`], then a reference to the inner + /// [`LogicalAndExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_logical_and(&self) -> Option<&LogicalAndExpr> { match self { - Self::LogicalAnd(expr) => Some(expr), + Self::LogicalAnd(logical_and) => Some(logical_and), _ => None, } } - /// Consumes `self` and attempts to return a logical `and` expression. + /// Consumes `self` and attempts to return the inner [`LogicalAndExpr`]. /// - /// - If the value is a logical `and` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalAnd`], then the inner [`LogicalAndExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_logical_and(self) -> Option { match self { - Self::LogicalAnd(expr) => Some(expr), + Self::LogicalAnd(logical_and) => Some(logical_and), _ => None, } } @@ -338,24 +501,26 @@ impl Expr { } } - /// Attempts to reference an equality expression. + /// Attempts to get a reference to the inner [`EqualityExpr`]. /// - /// - If the value is an equality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Equality`], then a reference to the inner + /// [`EqualityExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_equality(&self) -> Option<&EqualityExpr> { match self { - Self::Equality(expr) => Some(expr), + Self::Equality(equality) => Some(equality), _ => None, } } - /// Consumes `self` and attempts to return an equality expression. + /// Consumes `self` and attempts to return the inner [`EqualityExpr`]. /// - /// - If the value is an equality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Equality`], then the inner [`EqualityExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_equality(self) -> Option { match self { - Self::Equality(expr) => Some(expr), + Self::Equality(equality) => Some(equality), _ => None, } } @@ -372,24 +537,26 @@ impl Expr { } } - /// Attempts to reference an inequality expression. + /// Attempts to get a reference to the inner [`InequalityExpr`]. /// - /// - If the value is an inequality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Inequality`], then a reference to the inner + /// [`InequalityExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_inequality(&self) -> Option<&InequalityExpr> { match self { - Self::Inequality(expr) => Some(expr), + Self::Inequality(inequality) => Some(inequality), _ => None, } } - /// Consumes `self` and attempts to return an inequality expression. + /// Consumes `self` and attempts to return the inner [`InequalityExpr`]. /// - /// - If the value is an inequality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Inequality`], then the inner [`InequalityExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_inequality(self) -> Option { match self { - Self::Inequality(expr) => Some(expr), + Self::Inequality(inequality) => Some(inequality), _ => None, } } @@ -406,24 +573,26 @@ impl Expr { } } - /// Attempts to reference a "less than" expression. + /// Attempts to get a reference to the inner [`LessExpr`]. /// - /// - If the value is a "less than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Less`], then a reference to the inner + /// [`LessExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_less(&self) -> Option<&LessExpr> { match self { - Self::Less(expr) => Some(expr), + Self::Less(less) => Some(less), _ => None, } } - /// Consumes `self` and attempts to return a "less than" expression. + /// Consumes `self` and attempts to return the inner [`LessExpr`]. /// - /// - If the value is a "less than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Less`], then the inner [`LessExpr`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_less(self) -> Option { match self { - Self::Less(expr) => Some(expr), + Self::Less(less) => Some(less), _ => None, } } @@ -440,27 +609,26 @@ impl Expr { } } - /// Attempts to reference a "less than or equal to" expression. + /// Attempts to get a reference to the inner [`LessEqualExpr`]. /// - /// - If the value is a "less than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LessEqual`], then a reference to the inner + /// [`LessEqualExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_less_equal(&self) -> Option<&LessEqualExpr> { match self { - Self::LessEqual(expr) => Some(expr), + Self::LessEqual(less_equal) => Some(less_equal), _ => None, } } - /// Consumes `self` and attempts to return a "less than or equal to" - /// expression. + /// Consumes `self` and attempts to return the inner [`LessEqualExpr`]. /// - /// - If the value is a "less than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LessEqual`], then the inner [`LessEqualExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_less_equal(self) -> Option { match self { - Self::LessEqual(expr) => Some(expr), + Self::LessEqual(less_equal) => Some(less_equal), _ => None, } } @@ -477,24 +645,26 @@ impl Expr { } } - /// Attempts to reference a "greater than" expression. + /// Attempts to get a reference to the inner [`GreaterExpr`]. /// - /// - If the value is a "greater than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Greater`], then a reference to the inner + /// [`GreaterExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_greater(&self) -> Option<&GreaterExpr> { match self { - Self::Greater(expr) => Some(expr), + Self::Greater(greater) => Some(greater), _ => None, } } - /// Consumes `self` and attempts to return a "greater than" expression. + /// Consumes `self` and attempts to return the inner [`GreaterExpr`]. /// - /// - If the value is a "greater than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Greater`], then the inner [`GreaterExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_greater(self) -> Option { match self { - Self::Greater(expr) => Some(expr), + Self::Greater(greater) => Some(greater), _ => None, } } @@ -511,27 +681,26 @@ impl Expr { } } - /// Attempts to reference a "greater than or equal to" expression. + /// Attempts to get a reference to the inner [`GreaterEqualExpr`]. /// - /// - If the value is a "greater than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::GreaterEqual`], then a reference to the inner + /// [`GreaterEqualExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_greater_equal(&self) -> Option<&GreaterEqualExpr> { match self { - Self::GreaterEqual(expr) => Some(expr), + Self::GreaterEqual(greater_equal) => Some(greater_equal), _ => None, } } - /// Consumes `self` and attempts to return a "greater than or equal to" - /// expression. + /// Consumes `self` and attempts to return the inner [`GreaterEqualExpr`]. /// - /// - If the value is a "greater than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::GreaterEqual`], then the inner + /// [`GreaterEqualExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_greater_equal(self) -> Option { match self { - Self::GreaterEqual(expr) => Some(expr), + Self::GreaterEqual(greater_equal) => Some(greater_equal), _ => None, } } @@ -548,24 +717,26 @@ impl Expr { } } - /// Attempts to reference an addition expression. + /// Attempts to get a reference to the inner [`AdditionExpr`]. /// - /// - If the value is an addition expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Addition`], then a reference to the inner + /// [`AdditionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_addition(&self) -> Option<&AdditionExpr> { match self { - Self::Addition(expr) => Some(expr), + Self::Addition(addition) => Some(addition), _ => None, } } - /// Consumes `self` and attempts to return an addition expression. + /// Consumes `self` and attempts to return the inner [`AdditionExpr`]. /// - /// - If the value is an addition expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Addition`], then the inner [`AdditionExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_addition(self) -> Option { match self { - Self::Addition(expr) => Some(expr), + Self::Addition(addition) => Some(addition), _ => None, } } @@ -582,24 +753,26 @@ impl Expr { } } - /// Attempts to reference a subtraction expression. + /// Attempts to get a reference to the inner [`SubtractionExpr`]. /// - /// - If the value is a subtraction expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Subtraction`], then a reference to the inner + /// [`SubtractionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_subtraction(&self) -> Option<&SubtractionExpr> { match self { - Self::Subtraction(expr) => Some(expr), + Self::Subtraction(subtraction) => Some(subtraction), _ => None, } } - /// Consumes `self` and attempts to return a subtraction expression. + /// Consumes `self` and attempts to return the inner [`SubtractionExpr`]. /// - /// - If the value is a subtraction expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Subtraction`], then the inner + /// [`SubtractionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_subtraction(self) -> Option { match self { - Self::Subtraction(expr) => Some(expr), + Self::Subtraction(subtraction) => Some(subtraction), _ => None, } } @@ -616,24 +789,26 @@ impl Expr { } } - /// Attempts to reference a multiplication expression. + /// Attempts to get a reference to the inner [`MultiplicationExpr`]. /// - /// - If the value is a multiplication expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Multiplication`], then a reference to the inner + /// [`MultiplicationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_multiplication(&self) -> Option<&MultiplicationExpr> { match self { - Self::Multiplication(expr) => Some(expr), + Self::Multiplication(multiplication) => Some(multiplication), _ => None, } } - /// Consumes `self` and attempts to return a multiplication expression. + /// Consumes `self` and attempts to return the inner [`MultiplicationExpr`]. /// - /// - If the value is a multiplication expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Multiplication`], then the inner + /// [`MultiplicationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_multiplication(self) -> Option { match self { - Self::Multiplication(expr) => Some(expr), + Self::Multiplication(multiplication) => Some(multiplication), _ => None, } } @@ -650,24 +825,26 @@ impl Expr { } } - /// Attempts to reference a division expression. + /// Attempts to get a reference to the inner [`DivisionExpr`]. /// - /// - If the value is a division expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Division`], then a reference to the inner + /// [`DivisionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_division(&self) -> Option<&DivisionExpr> { match self { - Self::Division(expr) => Some(expr), + Self::Division(division) => Some(division), _ => None, } } - /// Consumes `self` and attempts to return a division expression. + /// Consumes `self` and attempts to return the inner [`DivisionExpr`]. /// - /// - If the value is a division expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Division`], then the inner [`DivisionExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_division(self) -> Option { match self { - Self::Division(expr) => Some(expr), + Self::Division(division) => Some(division), _ => None, } } @@ -684,24 +861,26 @@ impl Expr { } } - /// Attempts to reference a modulo expression. + /// Attempts to get a reference to the inner [`ModuloExpr`]. /// - /// - If the value is a modulo expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Modulo`], then a reference to the inner + /// [`ModuloExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_modulo(&self) -> Option<&ModuloExpr> { match self { - Self::Modulo(expr) => Some(expr), + Self::Modulo(modulo) => Some(modulo), _ => None, } } - /// Consumes `self` and attempts to return a modulo expression. + /// Consumes `self` and attempts to return the inner [`ModuloExpr`]. /// - /// - If the value is a modulo expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Modulo`], then the inner [`ModuloExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_modulo(self) -> Option { match self { - Self::Modulo(expr) => Some(expr), + Self::Modulo(modulo) => Some(modulo), _ => None, } } @@ -718,24 +897,26 @@ impl Expr { } } - /// Attempts to reference an exponentiation expression. + /// Attempts to get a reference to the inner [`ExponentiationExpr`]. /// - /// - If the value is an exponentiation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Exponentiation`], then a reference to the inner + /// [`ExponentiationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_exponentiation(&self) -> Option<&ExponentiationExpr> { match self { - Self::Exponentiation(expr) => Some(expr), + Self::Exponentiation(exponentiation) => Some(exponentiation), _ => None, } } - /// Consumes `self` and attempts to return an exponentiation expression. + /// Consumes `self` and attempts to return the inner [`ExponentiationExpr`]. /// - /// - If the value is an exponentiation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Exponentiation`], then the inner + /// [`ExponentiationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_exponentiation(self) -> Option { match self { - Self::Exponentiation(expr) => Some(expr), + Self::Exponentiation(exponentiation) => Some(exponentiation), _ => None, } } @@ -752,24 +933,26 @@ impl Expr { } } - /// Attempts to reference a call expression. + /// Attempts to get a reference to the inner [`CallExpr`]. /// - /// - If the value is a call expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Call`], then a reference to the inner + /// [`CallExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_call(&self) -> Option<&CallExpr> { match self { - Self::Call(expr) => Some(expr), + Self::Call(call) => Some(call), _ => None, } } - /// Consumes `self` and attempts to return a call expression. + /// Consumes `self` and attempts to return the inner [`CallExpr`]. /// - /// - If the value is a call expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Call`], then the inner [`CallExpr`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_call(self) -> Option { match self { - Self::Call(expr) => Some(expr), + Self::Call(call) => Some(call), _ => None, } } @@ -786,24 +969,26 @@ impl Expr { } } - /// Attempts to reference an index expression. + /// Attempts to get a reference to the inner [`IndexExpr`]. /// - /// - If the value is an index expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Index`], then a reference to the inner + /// [`IndexExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_index(&self) -> Option<&IndexExpr> { match self { - Self::Index(expr) => Some(expr), + Self::Index(index) => Some(index), _ => None, } } - /// Consumes `self` and attempts to return an index expression. + /// Consumes `self` and attempts to return the inner [`IndexExpr`]. /// - /// - If the value is an index expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Index`], then the inner [`IndexExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_index(self) -> Option { match self { - Self::Index(expr) => Some(expr), + Self::Index(index) => Some(index), _ => None, } } @@ -820,24 +1005,26 @@ impl Expr { } } - /// Attempts to reference an access expression. + /// Attempts to get a reference to the inner [`AccessExpr`]. /// - /// - If the value is an access expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Access`], then a reference to the inner + /// [`AccessExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_access(&self) -> Option<&AccessExpr> { match self { - Self::Access(expr) => Some(expr), + Self::Access(access) => Some(access), _ => None, } } - /// Consumes `self` and attempts to return an access expression. + /// Consumes `self` and attempts to return the inner [`AccessExpr`]. /// - /// - If the value is an access expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Access`], then the inner [`AccessExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_access(self) -> Option { match self { - Self::Access(expr) => Some(expr), + Self::Access(access) => Some(access), _ => None, } } @@ -853,6 +1040,24 @@ impl Expr { _ => panic!("not an access expression"), } } + + /// Finds the first child that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`Expr`] to implement + /// the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`Expr`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } impl AstNode for Expr { @@ -995,25 +1200,116 @@ pub enum LiteralExpr { } impl LiteralExpr { - /// Attempts to reference the expression as a literal boolean. - /// - /// - If the value is a literal boolean, `Some()` is returned. - /// - Else, `None` is returned. + /// Returns whether or not a [`SyntaxKind`] is able to be cast + /// to any of the underlying members within the [`Expr`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::LiteralBooleanNode + | SyntaxKind::LiteralIntegerNode + | SyntaxKind::LiteralFloatNode + | SyntaxKind::LiteralStringNode + | SyntaxKind::LiteralArrayNode + | SyntaxKind::LiteralPairNode + | SyntaxKind::LiteralMapNode + | SyntaxKind::LiteralObjectNode + | SyntaxKind::LiteralStructNode + | SyntaxKind::LiteralNoneNode + | SyntaxKind::LiteralHintsNode + | SyntaxKind::LiteralInputNode + | SyntaxKind::LiteralOutputNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`LiteralExpr`]. + pub fn cast(syntax: SyntaxNode) -> Option { + match syntax.kind() { + SyntaxKind::LiteralBooleanNode => Some(Self::Boolean( + LiteralBoolean::cast(syntax).expect("literal boolean to cast"), + )), + SyntaxKind::LiteralIntegerNode => Some(Self::Integer( + LiteralInteger::cast(syntax).expect("literal integer to cast"), + )), + SyntaxKind::LiteralFloatNode => Some(Self::Float( + LiteralFloat::cast(syntax).expect("literal float to cast"), + )), + SyntaxKind::LiteralStringNode => Some(Self::String( + LiteralString::cast(syntax).expect("literal string to cast"), + )), + SyntaxKind::LiteralArrayNode => Some(Self::Array( + LiteralArray::cast(syntax).expect("literal array to cast"), + )), + SyntaxKind::LiteralPairNode => Some(Self::Pair( + LiteralPair::cast(syntax).expect("literal pair to cast"), + )), + SyntaxKind::LiteralMapNode => Some(Self::Map( + LiteralMap::cast(syntax).expect("literal map to case"), + )), + SyntaxKind::LiteralObjectNode => Some(Self::Object( + LiteralObject::cast(syntax).expect("literal object to cast"), + )), + SyntaxKind::LiteralStructNode => Some(Self::Struct( + LiteralStruct::cast(syntax).expect("literal struct to cast"), + )), + SyntaxKind::LiteralNoneNode => Some(Self::None( + LiteralNone::cast(syntax).expect("literal none to cast"), + )), + SyntaxKind::LiteralHintsNode => Some(Self::Hints( + LiteralHints::cast(syntax).expect("literal hints to cast"), + )), + SyntaxKind::LiteralInputNode => Some(Self::Input( + LiteralInput::cast(syntax).expect("literal input to cast"), + )), + SyntaxKind::LiteralOutputNode => Some(Self::Output( + LiteralOutput::cast(syntax).expect("literal output to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Boolean(element) => element.syntax(), + Self::Integer(element) => element.syntax(), + Self::Float(element) => element.syntax(), + Self::String(element) => element.syntax(), + Self::Array(element) => element.syntax(), + Self::Pair(element) => element.syntax(), + Self::Map(element) => element.syntax(), + Self::Object(element) => element.syntax(), + Self::Struct(element) => element.syntax(), + Self::None(element) => element.syntax(), + Self::Hints(element) => element.syntax(), + Self::Input(element) => element.syntax(), + Self::Output(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`LiteralBoolean`]. + /// + /// * If `self` is a [`LiteralExpr::Boolean`], then a reference to the inner + /// [`LiteralBoolean`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_boolean(&self) -> Option<&LiteralBoolean> { match self { - Self::Boolean(literal) => Some(literal), + Self::Boolean(boolean) => Some(boolean), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// boolean. + /// Consumes `self` and attempts to return the inner [`LiteralBoolean`]. /// - /// - If the value is a literal boolean, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Boolean`], then the inner + /// [`LiteralBoolean`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_boolean(self) -> Option { match self { - Self::Boolean(literal) => Some(literal), + Self::Boolean(boolean) => Some(boolean), _ => None, } } @@ -1030,25 +1326,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal integer. + /// Attempts to get a reference to the inner [`LiteralInteger`]. /// - /// - If the value is a literal integer, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Integer`], then a reference to the inner + /// [`LiteralInteger`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_integer(&self) -> Option<&LiteralInteger> { match self { - Self::Integer(literal) => Some(literal), + Self::Integer(integer) => Some(integer), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// integer. + /// Consumes `self` and attempts to return the inner [`LiteralInteger`]. /// - /// - If the value is a literal integer, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Integer`], then the inner + /// [`LiteralInteger`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_integer(self) -> Option { match self { - Self::Integer(literal) => Some(literal), + Self::Integer(integer) => Some(integer), _ => None, } } @@ -1065,25 +1362,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal float. + /// Attempts to get a reference to the inner [`LiteralFloat`]. /// - /// - If the value is a literal float, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Float`], then a reference to the inner + /// [`LiteralFloat`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_float(&self) -> Option<&LiteralFloat> { match self { - Self::Float(literal) => Some(literal), + Self::Float(float) => Some(float), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// float. + /// Consumes `self` and attempts to return the inner [`LiteralFloat`]. /// - /// - If the value is a literal float, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Float`], then the inner [`LiteralFloat`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_float(self) -> Option { match self { - Self::Float(literal) => Some(literal), + Self::Float(float) => Some(float), _ => None, } } @@ -1100,25 +1398,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal string. + /// Attempts to get a reference to the inner [`LiteralString`]. /// - /// - If the value is a literal string, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::String`], then a reference to the inner + /// [`LiteralString`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_string(&self) -> Option<&LiteralString> { match self { - Self::String(literal) => Some(literal), + Self::String(string) => Some(string), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// string. + /// Consumes `self` and attempts to return the inner [`LiteralString`]. /// - /// - If the value is a literal string, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::String`], then the inner + /// [`LiteralString`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_string(self) -> Option { match self { - Self::String(literal) => Some(literal), + Self::String(string) => Some(string), _ => None, } } @@ -1135,25 +1434,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal array. + /// Attempts to get a reference to the inner [`LiteralArray`]. /// - /// - If the value is a literal array, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Array`], then a reference to the inner + /// [`LiteralArray`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_array(&self) -> Option<&LiteralArray> { match self { - Self::Array(literal) => Some(literal), + Self::Array(array) => Some(array), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// array. + /// Consumes `self` and attempts to return the inner [`LiteralArray`]. /// - /// - If the value is a literal array, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Array`], then the inner [`LiteralArray`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_array(self) -> Option { match self { - Self::Array(literal) => Some(literal), + Self::Array(array) => Some(array), _ => None, } } @@ -1170,24 +1470,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal pair. + /// Attempts to get a reference to the inner [`LiteralPair`]. /// - /// - If the value is a literal pair, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Pair`], then a reference to the inner + /// [`LiteralPair`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_pair(&self) -> Option<&LiteralPair> { match self { - Self::Pair(literal) => Some(literal), + Self::Pair(pair) => Some(pair), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal pair. + /// Consumes `self` and attempts to return the inner [`LiteralPair`]. /// - /// - If the value is a literal pair, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Pair`], then the inner [`LiteralPair`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_pair(self) -> Option { match self { - Self::Pair(literal) => Some(literal), + Self::Pair(pair) => Some(pair), _ => None, } } @@ -1204,24 +1506,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal map. + /// Attempts to get a reference to the inner [`LiteralMap`]. /// - /// - If the value is a literal map, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Map`], then a reference to the inner + /// [`LiteralMap`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_map(&self) -> Option<&LiteralMap> { match self { - Self::Map(literal) => Some(literal), + Self::Map(map) => Some(map), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal map. + /// Consumes `self` and attempts to return the inner [`LiteralMap`]. /// - /// - If the value is a literal map, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Map`], then the inner [`LiteralMap`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_map(self) -> Option { match self { - Self::Map(literal) => Some(literal), + Self::Map(map) => Some(map), _ => None, } } @@ -1238,25 +1542,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal object. + /// Attempts to get a reference to the inner [`LiteralObject`]. /// - /// - If the value is a literal object, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Object`], then a reference to the inner + /// [`LiteralObject`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_object(&self) -> Option<&LiteralObject> { match self { - Self::Object(literal) => Some(literal), + Self::Object(object) => Some(object), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// object. + /// Consumes `self` and attempts to return the inner [`LiteralObject`]. /// - /// - If the value is a literal object, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Object`], then the inner + /// [`LiteralObject`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_object(self) -> Option { match self { - Self::Object(literal) => Some(literal), + Self::Object(object) => Some(object), _ => None, } } @@ -1273,25 +1578,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal struct. + /// Attempts to get a reference to the inner [`LiteralStruct`]. /// - /// - If the value is a literal struct, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Struct`], then a reference to the inner + /// [`LiteralStruct`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_struct(&self) -> Option<&LiteralStruct> { match self { - Self::Struct(literal) => Some(literal), + Self::Struct(r#struct) => Some(r#struct), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// struct. + /// Consumes `self` and attempts to return the inner [`LiteralStruct`]. /// - /// - If the value is a literal struct, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Struct`], then the inner + /// [`LiteralStruct`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_struct(self) -> Option { match self { - Self::Struct(literal) => Some(literal), + Self::Struct(r#struct) => Some(r#struct), _ => None, } } @@ -1308,25 +1614,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `None`. + /// Attempts to get a reference to the inner [`LiteralNone`]. /// - /// - If the value is a literal `None`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::None`], then a reference to the inner + /// [`LiteralNone`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_none(&self) -> Option<&LiteralNone> { match self { - Self::None(literal) => Some(literal), + Self::None(none) => Some(none), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `None`. + /// Consumes `self` and attempts to return the inner [`LiteralNone`]. /// - /// - If the value is a literal `None`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::None`], then the inner [`LiteralNone`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_none(self) -> Option { match self { - Self::None(literal) => Some(literal), + Self::None(none) => Some(none), _ => None, } } @@ -1343,25 +1650,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `hints`. + /// Attempts to get a reference to the inner [`LiteralHints`]. /// - /// - If the value is a literal `hints`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Hints`], then a reference to the inner + /// [`LiteralHints`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_hints(&self) -> Option<&LiteralHints> { match self { - Self::Hints(literal) => Some(literal), + Self::Hints(hints) => Some(hints), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `hints`. + /// Consumes `self` and attempts to return the inner [`LiteralHints`]. /// - /// - If the value is a literal `hints`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Hints`], then the inner [`LiteralHints`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_hints(self) -> Option { match self { - Self::Hints(literal) => Some(literal), + Self::Hints(hints) => Some(hints), _ => None, } } @@ -1378,25 +1686,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `input`. - - /// - If the value is a literal `input`, `Some()` is returned. - /// - Else, `None` is returned. + /// Attempts to get a reference to the inner [`LiteralInput`]. + /// + /// * If `self` is a [`LiteralExpr::Input`], then a reference to the inner + /// [`LiteralInput`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_input(&self) -> Option<&LiteralInput> { match self { - Self::Input(literal) => Some(literal), + Self::Input(input) => Some(input), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `input`. - - /// - If the value is a literal `input`, `Some()` is returned. - /// - Else, `None` is returned. + /// Consumes `self` and attempts to return the inner [`LiteralInput`]. + /// + /// * If `self` is a [`LiteralExpr::Input`], then the inner [`LiteralInput`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_input(self) -> Option { match self { - Self::Input(literal) => Some(literal), + Self::Input(input) => Some(input), _ => None, } } @@ -1413,25 +1722,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `output`. - - /// - If the value is a literal `output`, `Some()` is returned. - /// - Else, `None` is returned. + /// Attempts to get a reference to the inner [`LiteralOutput`]. + /// + /// * If `self` is a [`LiteralExpr::Output`], then a reference to the inner + /// [`LiteralOutput`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_output(&self) -> Option<&LiteralOutput> { match self { - Self::Output(literal) => Some(literal), + Self::Output(output) => Some(output), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `output`. - - /// - If the value is a literal `output`, `Some()` is returned. - /// - Else, `None` is returned. + /// Consumes `self` and attempts to return the inner [`LiteralOutput`]. + /// + /// * If `self` is a [`LiteralExpr::Output`], then the inner + /// [`LiteralOutput`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_output(self) -> Option { match self { - Self::Output(literal) => Some(literal), + Self::Output(output) => Some(output), _ => None, } } @@ -1447,71 +1757,23 @@ impl LiteralExpr { _ => panic!("not a literal `output`"), } } -} - -impl AstNode for LiteralExpr { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::LiteralBooleanNode - | SyntaxKind::LiteralIntegerNode - | SyntaxKind::LiteralFloatNode - | SyntaxKind::LiteralStringNode - | SyntaxKind::LiteralArrayNode - | SyntaxKind::LiteralPairNode - | SyntaxKind::LiteralMapNode - | SyntaxKind::LiteralObjectNode - | SyntaxKind::LiteralStructNode - | SyntaxKind::LiteralNoneNode - | SyntaxKind::LiteralHintsNode - | SyntaxKind::LiteralInputNode - | SyntaxKind::LiteralOutputNode - ) - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::LiteralBooleanNode => Some(Self::Boolean(LiteralBoolean(syntax))), - SyntaxKind::LiteralIntegerNode => Some(Self::Integer(LiteralInteger(syntax))), - SyntaxKind::LiteralFloatNode => Some(Self::Float(LiteralFloat(syntax))), - SyntaxKind::LiteralStringNode => Some(Self::String(LiteralString(syntax))), - SyntaxKind::LiteralArrayNode => Some(Self::Array(LiteralArray(syntax))), - SyntaxKind::LiteralPairNode => Some(Self::Pair(LiteralPair(syntax))), - SyntaxKind::LiteralMapNode => Some(Self::Map(LiteralMap(syntax))), - SyntaxKind::LiteralObjectNode => Some(Self::Object(LiteralObject(syntax))), - SyntaxKind::LiteralStructNode => Some(Self::Struct(LiteralStruct(syntax))), - SyntaxKind::LiteralNoneNode => Some(Self::None(LiteralNone(syntax))), - SyntaxKind::LiteralHintsNode => Some(Self::Hints(LiteralHints(syntax))), - SyntaxKind::LiteralInputNode => Some(Self::Input(LiteralInput(syntax))), - SyntaxKind::LiteralOutputNode => Some(Self::Output(LiteralOutput(syntax))), - _ => None, - } + /// Finds the first child that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`LiteralExpr`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Boolean(b) => &b.0, - Self::Integer(i) => &i.0, - Self::Float(f) => &f.0, - Self::String(s) => &s.0, - Self::Array(a) => &a.0, - Self::Pair(p) => &p.0, - Self::Map(m) => &m.0, - Self::Object(o) => &o.0, - Self::Struct(s) => &s.0, - Self::None(n) => &n.0, - Self::Hints(h) => &h.0, - Self::Input(i) => &i.0, - Self::Output(o) => &o.0, - } + /// Finds all children that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`LiteralExpr`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -1973,14 +2235,14 @@ impl Placeholder { .expect("should have a placeholder open token") } - /// Gets the option for the placeholder. - pub fn option(&self) -> Option { - child(&self.0) + /// Gets the options for the placeholder. + pub fn options(&self) -> impl Iterator { + PlaceholderOption::children(&self.0) } /// Gets the placeholder expression. pub fn expr(&self) -> Expr { - child(&self.0).expect("placeholder should have an expression") + Expr::child(&self.0).expect("placeholder should have an expression") } } @@ -2023,6 +2285,73 @@ pub enum PlaceholderOption { } impl PlaceholderOption { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`PlaceholderOption`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::PlaceholderSepOptionNode + | SyntaxKind::PlaceholderDefaultOptionNode + | SyntaxKind::PlaceholderTrueFalseOptionNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`PlaceholderOption`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::PlaceholderSepOptionNode => Some(Self::Sep( + SepOption::cast(syntax).expect("separator option to cast"), + )), + SyntaxKind::PlaceholderDefaultOptionNode => Some(Self::Default( + DefaultOption::cast(syntax).expect("default option to cast"), + )), + SyntaxKind::PlaceholderTrueFalseOptionNode => Some(Self::TrueFalse( + TrueFalseOption::cast(syntax).expect("true false option to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Sep(element) => element.syntax(), + Self::Default(element) => element.syntax(), + Self::TrueFalse(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`SepOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Sep`], then a reference to the + /// inner [`SepOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_sep(&self) -> Option<&SepOption> { + match self { + Self::Sep(sep) => Some(sep), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`SepOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Sep`], then the inner + /// [`SepOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_sep(self) -> Option { + match self { + Self::Sep(sep) => Some(sep), + _ => None, + } + } + /// Unwraps the option into a separator option. /// /// # Panics @@ -2035,6 +2364,30 @@ impl PlaceholderOption { } } + /// Attempts to get a reference to the inner [`DefaultOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Default`], then a reference to the + /// inner [`DefaultOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_default(&self) -> Option<&DefaultOption> { + match self { + Self::Default(default) => Some(default), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`DefaultOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Default`], then the inner + /// [`DefaultOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_default(self) -> Option { + match self { + Self::Default(default) => Some(default), + _ => None, + } + } + /// Unwraps the option into a default option. /// /// # Panics @@ -2047,6 +2400,30 @@ impl PlaceholderOption { } } + /// Attempts to get a reference to the inner [`TrueFalseOption`]. + /// + /// * If `self` is a [`PlaceholderOption::TrueFalse`], then a reference to + /// the inner [`TrueFalseOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_true_false(&self) -> Option<&TrueFalseOption> { + match self { + Self::TrueFalse(true_false) => Some(true_false), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TrueFalseOption`]. + /// + /// * If `self` is a [`PlaceholderOption::TrueFalse`], then the inner + /// [`TrueFalseOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_true_false(self) -> Option { + match self { + Self::TrueFalse(true_false) => Some(true_false), + _ => None, + } + } + /// Unwraps the option into a true/false option. /// /// # Panics @@ -2058,6 +2435,24 @@ impl PlaceholderOption { _ => panic!("not a true/false option"), } } + + /// Finds the first child that can be cast to an [`PlaceholderOption`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`PlaceholderOption`] + /// to implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`PlaceholderOption`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring + /// [`PlaceholderOption`] to implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } impl AstNode for PlaceholderOption { @@ -2249,8 +2644,8 @@ pub struct LiteralArray(SyntaxNode); impl LiteralArray { /// Gets the elements of the literal array. - pub fn elements(&self) -> AstChildren { - children(&self.0) + pub fn elements(&self) -> impl Iterator { + Expr::children(&self.0) } } @@ -2286,7 +2681,7 @@ pub struct LiteralPair(SyntaxNode); impl LiteralPair { /// Gets the first and second expressions in the literal pair. pub fn exprs(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let first = children .next() .expect("pair should have a first expression"); @@ -2365,7 +2760,7 @@ pub struct LiteralMapItem(SyntaxNode); impl LiteralMapItem { /// Gets the key and the value of the item. pub fn key_value(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let key = children.next().expect("expected a key expression"); let value = children.next().expect("expected a value expression"); (key, value) @@ -2435,25 +2830,9 @@ impl AstNode for LiteralObject { /// Gets the name and value of a object or struct literal item. fn name_value(parent: &SyntaxNode) -> (Ident, Expr) { - let mut children = parent - .children_with_tokens() - .filter(|c| Ident::can_cast(c.kind()) || Expr::can_cast(c.kind())); - let key = Ident::cast( - children - .next() - .expect("expected a key token") - .into_token() - .expect("key should be a token"), - ) - .expect("token should cast to ident"); - let value = Expr::cast( - children - .next() - .expect("there should be a value expression") - .into_node() - .expect("value should be a node"), - ) - .expect("node should cast to an expression"); + let key = token_child::(parent).expect("expected a key token"); + let value = Expr::child(parent).expect("expected a value expression"); + (key, value) } @@ -2647,7 +3026,7 @@ impl LiteralHintsItem { /// Gets the expression of the hints item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -2729,7 +3108,7 @@ impl LiteralInputItem { /// Gets the expression of the input item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -2811,7 +3190,7 @@ impl LiteralOutputItem { /// Gets the expression of the output item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -2883,7 +3262,7 @@ pub struct ParenthesizedExpr(SyntaxNode); impl ParenthesizedExpr { /// Gets the inner expression. pub fn inner(&self) -> Expr { - child(&self.0).expect("expected an inner expression") + Expr::child(&self.0).expect("expected an inner expression") } } @@ -2923,7 +3302,7 @@ impl IfExpr { /// The second expression is the `true` expression. /// The third expression is the `false` expression. pub fn exprs(&self) -> (Expr, Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let conditional = children .next() .expect("should have a conditional expression"); @@ -2968,7 +3347,7 @@ macro_rules! prefix_expression { impl $name { /// Gets the operand expression. pub fn operand(&self) -> Expr { - child(&self.0).expect("expected an operand expression") + Expr::child(&self.0).expect("expected an operand expression") } } @@ -3009,7 +3388,7 @@ macro_rules! infix_expression { impl $name { /// Gets the operands of the expression. pub fn operands(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let lhs = children.next().expect("expected a lhs expression"); let rhs = children.next().expect("expected a rhs expression"); (lhs, rhs) @@ -3076,7 +3455,7 @@ impl CallExpr { /// Gets the call arguments. pub fn arguments(&self) -> impl Iterator { - children(&self.0) + Expr::children(&self.0) } } @@ -3115,7 +3494,7 @@ impl IndexExpr { /// The first is the operand expression. /// The second is the index expression. pub fn operands(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let operand = children.next().expect("expected an operand expression"); let index = children.next().expect("expected an index expression"); (operand, index) @@ -3157,7 +3536,7 @@ impl AccessExpr { /// The first is the operand expression. /// The second is the member name. pub fn operands(&self) -> (Expr, Ident) { - let operand = child(&self.0).expect("expected an operand expression"); + let operand = Expr::child(&self.0).expect("expected an operand expression"); let name = Ident::cast(self.0.last_token().expect("expected a last token")) .expect("expected an ident token"); (operand, name) diff --git a/wdl-ast/src/v1/import.rs b/wdl-ast/src/v1/import.rs index f42d67935..5d54f90d8 100644 --- a/wdl-ast/src/v1/import.rs +++ b/wdl-ast/src/v1/import.rs @@ -7,6 +7,9 @@ use url::Url; use wdl_grammar::lexer::v1::Logos; use wdl_grammar::lexer::v1::Token; +use super::AliasKeyword; +use super::AsKeyword; +use super::ImportKeyword; use super::LiteralString; use crate::AstChildren; use crate::AstNode; @@ -32,6 +35,11 @@ impl ImportStatement { child(&self.0).expect("import should have a URI") } + /// Gets the `import`` keyword of the import statement. + pub fn keyword(&self) -> ImportKeyword { + token(&self.0).expect("import should have a keyword") + } + /// Gets the explicit namespace of the import statement (i.e. the `as` /// clause). pub fn explicit_namespace(&self) -> Option { @@ -129,6 +137,16 @@ impl ImportAlias { let target = children.next().expect("expected a target identifier"); (source, target) } + + /// Gets the `alias` keyword of the alias. + pub fn alias_keyword(&self) -> AliasKeyword { + token(&self.0).expect("alias should have an `alias` keyword") + } + + /// Gets the `as` keyword of the alias. + pub fn as_keyword(&self) -> AsKeyword { + token(&self.0).expect("alias should have an `as` keyword") + } } impl AstNode for ImportAlias { diff --git a/wdl-ast/src/v1/struct.rs b/wdl-ast/src/v1/struct.rs index 6e53ecc49..c4eda27f4 100644 --- a/wdl-ast/src/v1/struct.rs +++ b/wdl-ast/src/v1/struct.rs @@ -2,6 +2,7 @@ use super::MetadataSection; use super::ParameterMetadataSection; +use super::StructKeyword; use super::UnboundDecl; use crate::AstChildren; use crate::AstNode; @@ -22,9 +23,14 @@ impl StructDefinition { token(&self.0).expect("struct should have a name") } + /// Gets the `struct` keyword of the struct definition. + pub fn keyword(&self) -> StructKeyword { + token(&self.0).expect("struct should have a keyword") + } + /// Gets the items in the struct definition. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + StructItem::children(&self.0) } /// Gets the member declarations of the struct. @@ -79,10 +85,10 @@ pub enum StructItem { ParameterMetadata(ParameterMetadataSection), } -impl AstNode for StructItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl StructItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`StructItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -94,27 +100,126 @@ impl AstNode for StructItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`StructItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::UnboundDeclNode => Some(Self::Member(UnboundDecl(syntax))), - SyntaxKind::MetadataSectionNode => Some(Self::Metadata(MetadataSection(syntax))), - SyntaxKind::ParameterMetadataSectionNode => { - Some(Self::ParameterMetadata(ParameterMetadataSection(syntax))) - } + SyntaxKind::UnboundDeclNode => Some(Self::Member( + UnboundDecl::cast(syntax).expect("unbound decl to cast"), + )), + SyntaxKind::MetadataSectionNode => Some(Self::Metadata( + MetadataSection::cast(syntax).expect("metadata section to cast"), + )), + SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( + ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Member(element) => element.syntax(), + Self::Metadata(element) => element.syntax(), + Self::ParameterMetadata(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`StructItem::Member`], then a reference to the inner + /// [`UnboundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_unbound_decl(&self) -> Option<&UnboundDecl> { + match self { + Self::Member(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`StructItem::Member`], then the inner [`UnboundDecl`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_unbound_decl(self) -> Option { + match self { + Self::Member(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`MetadataSection`]. + /// + /// * If `self` is a [`StructItem::Metadata`], then a reference to the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_metadata_section(&self) -> Option<&MetadataSection> { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MetadataSection`]. + /// + /// * If `self` is a [`StructItem::Metadata`], then the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_metadata_section(self) -> Option { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`StructItem::ParameterMetadata`], then a reference to + /// the inner [`ParameterMetadataSection`] is returned wrapped in + /// [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_parameter_metadata_section(&self) -> Option<&ParameterMetadataSection> { match self { - Self::Member(m) => &m.0, - Self::Metadata(m) => &m.0, - Self::ParameterMetadata(m) => &m.0, + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`StructItem::ParameterMetadata`], then the inner + /// [`ParameterMetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_parameter_metadata_section(self) -> Option { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, } } + + /// Finds the first child that can be cast to an [`StructItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`StructItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`StructItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`StructItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } #[cfg(test)] @@ -179,7 +284,7 @@ struct ComplexTypes { parameter_meta { a: "foo" } -} +} "#, ); assert!(diagnostics.is_empty()); diff --git a/wdl-ast/src/v1/task.rs b/wdl-ast/src/v1/task.rs index bccc14528..77088e27b 100644 --- a/wdl-ast/src/v1/task.rs +++ b/wdl-ast/src/v1/task.rs @@ -23,6 +23,7 @@ use crate::support; use crate::support::child; use crate::support::children; use crate::token; +use crate::v1::WorkflowHintsSection; pub mod common; pub mod requirements; @@ -39,8 +40,8 @@ impl TaskDefinition { } /// Gets the items of the task. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + TaskItem::children(&self.0) } /// Gets the input section of the task. @@ -125,8 +126,10 @@ pub enum TaskItem { Command(CommandSection), /// The item is a requirements section. Requirements(RequirementsSection), - /// The item is a hints section. - Hints(TaskHintsSection), + /// The item is a task hints section. + TaskHints(TaskHintsSection), + /// The item is a workflow hints section. + WorkflowHints(WorkflowHintsSection), /// The item is a runtime section. Runtime(RuntimeSection), /// The item is a metadata section. @@ -137,10 +140,10 @@ pub enum TaskItem { Declaration(BoundDecl), } -impl AstNode for TaskItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl TaskItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`TaskItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -158,41 +161,324 @@ impl AstNode for TaskItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`TaskItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::InputSectionNode => Some(Self::Input(InputSection(syntax))), - SyntaxKind::OutputSectionNode => Some(Self::Output(OutputSection(syntax))), - SyntaxKind::CommandSectionNode => Some(Self::Command(CommandSection(syntax))), - SyntaxKind::RequirementsSectionNode => { - Some(Self::Requirements(RequirementsSection(syntax))) - } - SyntaxKind::TaskHintsSectionNode => Some(Self::Hints(TaskHintsSection(syntax))), - SyntaxKind::RuntimeSectionNode => Some(Self::Runtime(RuntimeSection(syntax))), - SyntaxKind::MetadataSectionNode => Some(Self::Metadata(MetadataSection(syntax))), - SyntaxKind::ParameterMetadataSectionNode => { - Some(Self::ParameterMetadata(ParameterMetadataSection(syntax))) - } - SyntaxKind::BoundDeclNode => Some(Self::Declaration(BoundDecl(syntax))), + SyntaxKind::InputSectionNode => Some(Self::Input( + InputSection::cast(syntax).expect("input section to cast"), + )), + SyntaxKind::OutputSectionNode => Some(Self::Output( + OutputSection::cast(syntax).expect("output section to cast"), + )), + SyntaxKind::CommandSectionNode => Some(Self::Command( + CommandSection::cast(syntax).expect("command section to cast"), + )), + SyntaxKind::RequirementsSectionNode => Some(Self::Requirements( + RequirementsSection::cast(syntax).expect("requirements section to cast"), + )), + SyntaxKind::RuntimeSectionNode => Some(Self::Runtime( + RuntimeSection::cast(syntax).expect("runtime section to cast"), + )), + SyntaxKind::MetadataSectionNode => Some(Self::Metadata( + MetadataSection::cast(syntax).expect("metadata section to cast"), + )), + SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( + ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), + )), + SyntaxKind::TaskHintsSectionNode => Some(Self::TaskHints( + TaskHintsSection::cast(syntax).expect("task hints section to cast"), + )), + SyntaxKind::WorkflowHintsSectionNode => Some(Self::WorkflowHints( + WorkflowHintsSection::cast(syntax).expect("workflow hints section to cast"), + )), + SyntaxKind::BoundDeclNode => Some(Self::Declaration( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Input(element) => element.syntax(), + Self::Output(element) => element.syntax(), + Self::Command(element) => element.syntax(), + Self::Requirements(element) => element.syntax(), + Self::TaskHints(element) => element.syntax(), + Self::WorkflowHints(element) => element.syntax(), + Self::Runtime(element) => element.syntax(), + Self::Metadata(element) => element.syntax(), + Self::ParameterMetadata(element) => element.syntax(), + Self::Declaration(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`InputSection`]. + /// + /// * If `self` is a [`TaskItem::Input`], then a reference to the inner + /// [`InputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_input_section(&self) -> Option<&InputSection> { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`InputSection`]. + /// + /// * If `self` is a [`TaskItem::Input`], then the inner [`InputSection`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_input_section(self) -> Option { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`OutputSection`]. + /// + /// * If `self` is a [`TaskItem::Output`], then a reference to the inner + /// [`OutputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_output_section(&self) -> Option<&OutputSection> { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`OutputSection`]. + /// + /// * If `self` is a [`TaskItem::Output`], then the inner [`OutputSection`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_output_section(self) -> Option { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`CommandSection`]. + /// + /// * If `self` is a [`TaskItem::Command`], then a reference to the inner + /// [`CommandSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_command_section(&self) -> Option<&CommandSection> { + match self { + Self::Command(command_section) => Some(command_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`CommandSection`]. + /// + /// * If `self` is a [`TaskItem::Command`], then the inner + /// [`CommandSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_command_section(self) -> Option { + match self { + Self::Command(command_section) => Some(command_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`RequirementsSection`]. + /// + /// * If `self` is a [`TaskItem::Requirements`], then a reference to the + /// inner [`RequirementsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_requirements_section(&self) -> Option<&RequirementsSection> { + match self { + Self::Requirements(requirements_section) => Some(requirements_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`RequirementsSection`]. + /// + /// * If `self` is a [`TaskItem::Requirements`], then the inner + /// [`RequirementsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_requirements_section(self) -> Option { match self { - Self::Input(i) => &i.0, - Self::Output(o) => &o.0, - Self::Command(c) => &c.0, - Self::Requirements(r) => &r.0, - Self::Hints(h) => &h.0, - Self::Runtime(r) => &r.0, - Self::Metadata(m) => &m.0, - Self::ParameterMetadata(m) => &m.0, - Self::Declaration(d) => &d.0, + Self::Requirements(requirements_section) => Some(requirements_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`TaskHintsSection`]. + /// + /// * If `self` is a [`TaskItem::TaskHints`], then a reference to the inner + /// [`TaskHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_task_hints_section(&self) -> Option<&TaskHintsSection> { + match self { + Self::TaskHints(task_hints_section) => Some(task_hints_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskHintsSection`]. + /// + /// * If `self` is a [`TaskItem::TaskHints`], then the inner + /// [`TaskHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_task_hints_section(self) -> Option { + match self { + Self::TaskHints(task_hints_section) => Some(task_hints_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::WorkflowHints`], then a reference to + /// the inner [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_workflow_hints_section(&self) -> Option<&WorkflowHintsSection> { + match self { + Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`WorkflowHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::WorkflowHints`], then the inner + /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_workflow_hints_section(self) -> Option { + match self { + Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`RuntimeSection`]. + /// + /// * If `self` is a [`TaskItem::Runtime`], then a reference to the inner + /// [`RuntimeSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_runtime_section(&self) -> Option<&RuntimeSection> { + match self { + Self::Runtime(runtime_section) => Some(runtime_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`RuntimeSection`]. + /// + /// * If `self` is a [`TaskItem::Runtime`], then the inner + /// [`RuntimeSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_runtime_section(self) -> Option { + match self { + Self::Runtime(runtime_section) => Some(runtime_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`MetadataSection`]. + /// + /// * If `self` is a [`TaskItem::Metadata`], then a reference to the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_metadata_section(&self) -> Option<&MetadataSection> { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MetadataSection`]. + /// + /// * If `self` is a [`TaskItem::Metadata`], then the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_metadata_section(self) -> Option { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`TaskItem::ParameterMetadata`], then a reference to + /// the inner [`ParameterMetadataSection`] is returned wrapped in + /// [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_parameter_metadata_section(&self) -> Option<&ParameterMetadataSection> { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, } } + + /// Consumes `self` and attempts to return the inner + /// [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`TaskItem::ParameterMetadata`], then the inner + /// [`ParameterMetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_parameter_metadata_section(self) -> Option { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`TaskItem::Declaration`], then a reference to the + /// inner [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_declaration(&self) -> Option<&BoundDecl> { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`BoundDecl`]. + /// + /// * If `self` is a [`TaskItem::Declaration`], then the inner [`BoundDecl`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_declaration(self) -> Option { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Finds the first child that can be cast to an [`TaskItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`TaskItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`TaskItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`TaskItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } /// Represents the parent of a section. @@ -207,6 +493,49 @@ pub enum SectionParent { } impl SectionParent { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`SectionParent`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::TaskDefinitionNode + | SyntaxKind::WorkflowDefinitionNode + | SyntaxKind::StructDefinitionNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`SectionParent`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::TaskDefinitionNode => Some(Self::Task( + TaskDefinition::cast(syntax).expect("task definition to cast"), + )), + SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow( + WorkflowDefinition::cast(syntax).expect("workflow definition to cast"), + )), + SyntaxKind::StructDefinitionNode => Some(Self::Struct( + StructDefinition::cast(syntax).expect("struct definition to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Task(element) => element.syntax(), + Self::Workflow(element) => element.syntax(), + Self::Struct(element) => element.syntax(), + } + } + /// Gets the name of the section parent. pub fn name(&self) -> Ident { match self { @@ -216,6 +545,30 @@ impl SectionParent { } } + /// Attempts to get a reference to the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`SectionParent::Task`], then a reference to the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_task(&self) -> Option<&TaskDefinition> { + match self { + Self::Task(task) => Some(task), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`SectionParent::Task`], then the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_task(self) -> Option { + match self { + Self::Task(task) => Some(task), + _ => None, + } + } + /// Unwraps to a task definition. /// /// # Panics @@ -228,6 +581,30 @@ impl SectionParent { } } + /// Attempts to get a reference to the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`SectionParent::Workflow`], then a reference to the + /// inner [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_workflow(&self) -> Option<&WorkflowDefinition> { + match self { + Self::Workflow(workflow) => Some(workflow), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`SectionParent::Workflow`], then the inner + /// [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_workflow(self) -> Option { + match self { + Self::Workflow(workflow) => Some(workflow), + _ => None, + } + } + /// Unwraps to a workflow definition. /// /// # Panics @@ -240,6 +617,30 @@ impl SectionParent { } } + /// Attempts to get a reference to the inner [`StructDefinition`]. + /// + /// * If `self` is a [`SectionParent::Struct`], then a reference to the + /// inner [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_struct(&self) -> Option<&StructDefinition> { + match self { + Self::Struct(r#struct) => Some(r#struct), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`StructDefinition`]. + /// + /// * If `self` is a [`SectionParent::Struct`], then the inner + /// [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_struct(self) -> Option { + match self { + Self::Struct(r#struct) => Some(r#struct), + _ => None, + } + } + /// Unwraps to a struct definition. /// /// # Panics @@ -251,41 +652,23 @@ impl SectionParent { _ => panic!("not a struct definition"), } } -} - -impl AstNode for SectionParent { - type Language = WorkflowDescriptionLanguage; - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::TaskDefinitionNode - | SyntaxKind::WorkflowDefinitionNode - | SyntaxKind::StructDefinitionNode - ) - } - - fn cast(node: SyntaxNode) -> Option - where - Self: Sized, - { - match node.kind() { - SyntaxKind::TaskDefinitionNode => Some(Self::Task(TaskDefinition(node))), - SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow(WorkflowDefinition(node))), - SyntaxKind::StructDefinitionNode => Some(Self::Struct(StructDefinition(node))), - _ => None, - } + /// Finds the first child that can be cast to an [`SectionParent`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`SectionParent`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Task(t) => &t.0, - Self::Workflow(w) => &w.0, - Self::Struct(s) => &s.0, - } + /// Finds all children that can be cast to an [`SectionParent`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`SectionParent`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -295,8 +678,8 @@ pub struct InputSection(pub(crate) SyntaxNode); impl InputSection { /// Gets the declarations of the input section. - pub fn declarations(&self) -> AstChildren { - children(&self.0) + pub fn declarations(&self) -> impl Iterator { + Decl::children(&self.0) } /// Gets the parent of the input section. @@ -388,12 +771,11 @@ impl CommandSection { self.0.children_with_tokens().filter_map(CommandPart::cast) } - /// Gets the command text if the command is not interpolated (i.e. - /// has no placeholders). + /// Gets the command text if the command is not interpolated (i.e. has no + /// placeholders). /// - /// Returns `None` if the command is interpolated, as - /// interpolated commands cannot be represented as a single - /// span of text. + /// Returns `None` if the command is interpolated, as interpolated commands + /// cannot be represented as a single span of text. pub fn text(&self) -> Option { let mut parts = self.parts(); if let Some(CommandPart::Text(text)) = parts.next() { @@ -569,7 +951,7 @@ impl RequirementsItem { /// Gets the expression of the requirements item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } /// Consumes `self` and attempts to cast the requirements item to a @@ -658,7 +1040,7 @@ impl TaskHintsItem { /// Gets the expression of the hints item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -749,7 +1131,7 @@ impl RuntimeItem { /// Gets the expression of the runtime item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } /// Consumes `self` and attempts to cast the runtime item to a diff --git a/wdl-ast/src/v1/task/common/container/value.rs b/wdl-ast/src/v1/task/common/container/value.rs index 5e6a703b6..d4035138b 100644 --- a/wdl-ast/src/v1/task/common/container/value.rs +++ b/wdl-ast/src/v1/task/common/container/value.rs @@ -3,8 +3,6 @@ use std::ops::Deref; -use rowan::ast::AstNode; - use crate::v1::Expr; use crate::v1::LiteralExpr; diff --git a/wdl-ast/src/v1/tokens.rs b/wdl-ast/src/v1/tokens.rs new file mode 100644 index 000000000..798b603a5 --- /dev/null +++ b/wdl-ast/src/v1/tokens.rs @@ -0,0 +1,2089 @@ +//! V1 AST tokens. + +use crate::AstToken; +use crate::SyntaxKind; +use crate::SyntaxToken; + +/// A token representing the `after` keyword. +#[derive(Clone, Debug)] +pub struct AfterKeyword(SyntaxToken); + +impl AstToken for AfterKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AfterKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option + where + Self: Sized, + { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AfterKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "after") + } +} + +/// A token representing the `alias` keyword. +#[derive(Clone, Debug)] +pub struct AliasKeyword(SyntaxToken); + +impl AstToken for AliasKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AliasKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AliasKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "alias") + } +} + +/// A token representing the `Array` type keyword. +#[derive(Clone, Debug)] +pub struct ArrayTypeKeyword(SyntaxToken); + +impl AstToken for ArrayTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ArrayTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ArrayTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Array") + } +} + +/// A token representing the `as` keyword. +#[derive(Clone, Debug)] +pub struct AsKeyword(SyntaxToken); + +impl AstToken for AsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "as") + } +} + +/// A token representing the `=` symbol. +#[derive(Clone, Debug)] +pub struct Assignment(SyntaxToken); + +impl AstToken for Assignment { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Assignment) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Assignment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "=") + } +} + +/// A token representing the `*` symbol. +#[derive(Clone, Debug)] +pub struct Asterisk(SyntaxToken); + +impl AstToken for Asterisk { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Asterisk) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Asterisk { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "*") + } +} + +/// A token representing the `Boolean` keyword. +#[derive(Clone, Debug)] +pub struct BooleanTypeKeyword(SyntaxToken); + +impl AstToken for BooleanTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::BooleanTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for BooleanTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Boolean") + } +} + +/// A token representing the `call` keyword. +#[derive(Clone, Debug)] +pub struct CallKeyword(SyntaxToken); + +impl AstToken for CallKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CallKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CallKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "call") + } +} + +/// A token representing the `}` symbol. +#[derive(Clone, Debug)] +pub struct CloseBrace(SyntaxToken); + +impl AstToken for CloseBrace { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseBrace) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseBrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "}}") + } +} + +/// A token representing the `]` symbol. +#[derive(Clone, Debug)] +pub struct CloseBracket(SyntaxToken); + +impl AstToken for CloseBracket { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseBracket) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseBracket { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "]") + } +} + +/// A token representing the `>>>` token. +#[derive(Clone, Debug)] +pub struct CloseHeredoc(SyntaxToken); + +impl AstToken for CloseHeredoc { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseHeredoc) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseHeredoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">>>") + } +} + +/// A token representing the `)` symbol. +#[derive(Clone, Debug)] +pub struct CloseParen(SyntaxToken); + +impl AstToken for CloseParen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseParen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseParen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ")") + } +} + +/// A token representing the `:` symbol. +#[derive(Clone, Debug)] +pub struct Colon(SyntaxToken); + +impl AstToken for Colon { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Colon) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Colon { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ":") + } +} + +/// A token representing the `,` symbol. +#[derive(Clone, Debug)] +pub struct Comma(SyntaxToken); + +impl AstToken for Comma { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Comma) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Comma { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ",") + } +} + +/// A token representing the `command` keyword. +#[derive(Clone, Debug)] +pub struct CommandKeyword(SyntaxToken); + +impl AstToken for CommandKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CommandKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CommandKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "command") + } +} + +/// A token representing the `Directory` type keyword. +#[derive(Clone, Debug)] +pub struct DirectoryTypeKeyword(SyntaxToken); + +impl AstToken for DirectoryTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::DirectoryTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for DirectoryTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Directory") + } +} + +/// A token representing the `.` symbol. +#[derive(Clone, Debug)] +pub struct Dot(SyntaxToken); + +impl AstToken for Dot { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Dot) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Dot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ".") + } +} + +/// A token representing the `"` symbol. +#[derive(Clone, Debug)] +pub struct DoubleQuote(SyntaxToken); + +impl AstToken for DoubleQuote { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::DoubleQuote) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for DoubleQuote { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, r#"""#) + } +} + +/// A token representing the `else` keyword. +#[derive(Clone, Debug)] +pub struct ElseKeyword(SyntaxToken); + +impl AstToken for ElseKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ElseKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ElseKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "else") + } +} + +/// A token representing the `==` symbol. +#[derive(Clone, Debug)] +pub struct Equal(SyntaxToken); + +impl AstToken for Equal { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Equal) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Equal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "=") + } +} + +/// A token representing the `!` symbol. +#[derive(Clone, Debug)] +pub struct Exclamation(SyntaxToken); + +impl AstToken for Exclamation { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Exclamation) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Exclamation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!") + } +} + +/// A token representing the `**` keyword. +#[derive(Clone, Debug)] +pub struct Exponentiation(SyntaxToken); + +impl AstToken for Exponentiation { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Exponentiation) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Exponentiation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "**") + } +} + +/// A token representing the `false` keyword. +#[derive(Clone, Debug)] +pub struct FalseKeyword(SyntaxToken); + +impl AstToken for FalseKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FalseKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FalseKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "false") + } +} + +/// A token representing the `File` type keyword. +#[derive(Clone, Debug)] +pub struct FileTypeKeyword(SyntaxToken); + +impl AstToken for FileTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FileTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FileTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "File") + } +} + +/// A token representing the `Float` type keyword. +#[derive(Clone, Debug)] +pub struct FloatTypeKeyword(SyntaxToken); + +impl AstToken for FloatTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FloatTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FloatTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Float") + } +} + +/// A token representing the `>` symbol. +#[derive(Clone, Debug)] +pub struct Greater(SyntaxToken); + +impl AstToken for Greater { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Greater) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Greater { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">") + } +} + +/// A token representing the `>=` symbol. +#[derive(Clone, Debug)] +pub struct GreaterEqual(SyntaxToken); + +impl AstToken for GreaterEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::GreaterEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for GreaterEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">=") + } +} + +/// A token representing the `hints` keyword. +#[derive(Clone, Debug)] +pub struct HintsKeyword(SyntaxToken); + +impl AstToken for HintsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::HintsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for HintsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "hints") + } +} + +/// A token representing the `if` keyword. +#[derive(Clone, Debug)] +pub struct IfKeyword(SyntaxToken); + +impl AstToken for IfKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::IfKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for IfKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "if") + } +} + +/// A token representing the `import` keyword. +#[derive(Clone, Debug)] +pub struct ImportKeyword(SyntaxToken); + +impl AstToken for ImportKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ImportKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ImportKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "import") + } +} + +/// A token representing the `in` keyword. +#[derive(Clone, Debug)] +pub struct InKeyword(SyntaxToken); + +impl AstToken for InKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::InKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for InKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "in") + } +} + +/// A token representing the `input` keyword. +#[derive(Clone, Debug)] +pub struct InputKeyword(SyntaxToken); + +impl AstToken for InputKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::InputKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for InputKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "input") + } +} + +/// A token representing the `Int` type keyword. +#[derive(Clone, Debug)] +pub struct IntTypeKeyword(SyntaxToken); + +impl AstToken for IntTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::IntTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for IntTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Int") + } +} + +/// A token representing the `<` symbol. +#[derive(Clone, Debug)] +pub struct Less(SyntaxToken); + +impl AstToken for Less { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Less) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Less { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<") + } +} + +/// A token representing the `<=` symbol. +#[derive(Clone, Debug)] +pub struct LessEqual(SyntaxToken); + +impl AstToken for LessEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LessEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LessEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<=") + } +} + +/// A token representing the `&&` symbol. +#[derive(Clone, Debug)] +pub struct LogicalAnd(SyntaxToken); + +impl AstToken for LogicalAnd { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LogicalAnd) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LogicalAnd { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "&&") + } +} + +/// A token representing the `||` symbol. +#[derive(Clone, Debug)] +pub struct LogicalOr(SyntaxToken); + +impl AstToken for LogicalOr { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LogicalOr) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LogicalOr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "||") + } +} + +/// A token representing the `Map` type keyword. +#[derive(Clone, Debug)] +pub struct MapTypeKeyword(SyntaxToken); + +impl AstToken for MapTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::MapTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for MapTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Map") + } +} + +/// A token representing the `meta` keyword. +#[derive(Clone, Debug)] +pub struct MetaKeyword(SyntaxToken); + +impl AstToken for MetaKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::MetaKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for MetaKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "meta") + } +} + +/// A token representing the `-` symbol. +#[derive(Clone, Debug)] +pub struct Minus(SyntaxToken); + +impl AstToken for Minus { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Minus) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Minus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "-") + } +} + +/// A token representing the `None` keyword. +#[derive(Clone, Debug)] +pub struct NoneKeyword(SyntaxToken); + +impl AstToken for NoneKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NoneKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NoneKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "None") + } +} + +/// A token representing the `!=` symbol. +#[derive(Clone, Debug)] +pub struct NotEqual(SyntaxToken); + +impl AstToken for NotEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NotEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NotEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!=") + } +} + +/// A token representing the `null` keyword. +#[derive(Clone, Debug)] +pub struct NullKeyword(SyntaxToken); + +impl AstToken for NullKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NullKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NullKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "null") + } +} + +/// A token representing the `object` keyword. +#[derive(Clone, Debug)] +pub struct ObjectKeyword(SyntaxToken); + +impl AstToken for ObjectKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ObjectKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ObjectKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "object") + } +} + +/// A token representing the `Object` type keyword. +#[derive(Clone, Debug)] +pub struct ObjectTypeKeyword(SyntaxToken); + +impl AstToken for ObjectTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ObjectTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ObjectTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Object") + } +} + +/// A token representing the `{` symbol. +#[derive(Clone, Debug)] +pub struct OpenBrace(SyntaxToken); + +impl AstToken for OpenBrace { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenBrace) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenBrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{") + } +} + +/// A token representing the `[` symbol. +#[derive(Clone, Debug)] +pub struct OpenBracket(SyntaxToken); + +impl AstToken for OpenBracket { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenBracket) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenBracket { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[") + } +} + +/// A token representing the `<<<` symbol. +#[derive(Clone, Debug)] +pub struct OpenHeredoc(SyntaxToken); + +impl AstToken for OpenHeredoc { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenHeredoc) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenHeredoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<<<") + } +} + +/// A token representing the `(` keyword. +#[derive(Clone, Debug)] +pub struct OpenParen(SyntaxToken); + +impl AstToken for OpenParen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenParen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenParen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "(") + } +} + +/// A token representing the `output` keyword. +#[derive(Clone, Debug)] +pub struct OutputKeyword(SyntaxToken); + +impl AstToken for OutputKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OutputKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OutputKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "output") + } +} + +/// A token representing the `Pair` type keyword. +#[derive(Clone, Debug)] +pub struct PairTypeKeyword(SyntaxToken); + +impl AstToken for PairTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::PairTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for PairTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Pair") + } +} + +/// A token representing the `parameter_meta` keyword. +#[derive(Clone, Debug)] +pub struct ParameterMetaKeyword(SyntaxToken); + +impl AstToken for ParameterMetaKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ParameterMetaKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ParameterMetaKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "parameter_meta") + } +} + +/// A token representing the `%` symbol. +#[derive(Clone, Debug)] +pub struct Percent(SyntaxToken); + +impl AstToken for Percent { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Percent) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Percent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "%") + } +} + +/// Represents one of the placeholder open symbols. +#[derive(Clone, Debug)] +pub struct PlaceholderOpen(SyntaxToken); + +impl AstToken for PlaceholderOpen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::PlaceholderOpen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for PlaceholderOpen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // NOTE: this is deferred to the entire underlying string simply because + // we cannot known a priori what the captured text is. + write!(f, "{}", self.0) + } +} + +/// A token representing the `+` symbol. +#[derive(Clone, Debug)] +pub struct Plus(SyntaxToken); + +impl AstToken for Plus { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Plus) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Plus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "+") + } +} + +/// A token representing the `?` symbol. +#[derive(Clone, Debug)] +pub struct QuestionMark(SyntaxToken); + +impl AstToken for QuestionMark { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::QuestionMark) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for QuestionMark { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "?") + } +} + +/// A token representing the `requirements` keyword. +#[derive(Clone, Debug)] +pub struct RequirementsKeyword(SyntaxToken); + +impl AstToken for RequirementsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::RequirementsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for RequirementsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "requirements") + } +} + +/// A token representing the `runtime` keyword. +#[derive(Clone, Debug)] +pub struct RuntimeKeyword(SyntaxToken); + +impl AstToken for RuntimeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::RuntimeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for RuntimeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "runtime") + } +} + +/// A token representing the `scatter` keyword. +#[derive(Clone, Debug)] +pub struct ScatterKeyword(SyntaxToken); + +impl AstToken for ScatterKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ScatterKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ScatterKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "scatter") + } +} + +/// A token representing the `'` symbol. +#[derive(Clone, Debug)] +pub struct SingleQuote(SyntaxToken); + +impl AstToken for SingleQuote { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::SingleQuote) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for SingleQuote { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "'") + } +} + +/// A token representing the `/` symbol. +#[derive(Clone, Debug)] +pub struct Slash(SyntaxToken); + +impl AstToken for Slash { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Slash) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Slash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "/") + } +} + +/// A token representing the `String` type keyword. +#[derive(Clone, Debug)] +pub struct StringTypeKeyword(SyntaxToken); + +impl AstToken for StringTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::StringTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for StringTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "String") + } +} + +/// A token representing the `struct` keyword. +#[derive(Clone, Debug)] +pub struct StructKeyword(SyntaxToken); + +impl AstToken for StructKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::StructKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for StructKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "struct") + } +} + +/// A token representing the `task` keyword. +#[derive(Clone, Debug)] +pub struct TaskKeyword(SyntaxToken); + +impl AstToken for TaskKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::TaskKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for TaskKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "task") + } +} + +/// A token representing the `then` keyword. +#[derive(Clone, Debug)] +pub struct ThenKeyword(SyntaxToken); + +impl AstToken for ThenKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ThenKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ThenKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "then") + } +} + +/// A token representing the `true` keyword. +#[derive(Clone, Debug)] +pub struct TrueKeyword(SyntaxToken); + +impl AstToken for TrueKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::TrueKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for TrueKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "true") + } +} + +/// A token representing unknown contents within a WDL document. +#[derive(Debug)] +pub struct Unknown(SyntaxToken); + +impl AstToken for Unknown { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Unknown) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Unknown { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // NOTE: this is deferred to the entire underlying string simply because + // we cannot known a priori what the captured text is. + write!(f, "{}", self.0) + } +} + +/// A token representing the `version` keyword. +#[derive(Clone, Debug)] +pub struct VersionKeyword(SyntaxToken); + +impl AstToken for VersionKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::VersionKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for VersionKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "version") + } +} + +/// A token representing the `workflow` keyword. +#[derive(Clone, Debug)] +pub struct WorkflowKeyword(SyntaxToken); + +impl AstToken for WorkflowKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::WorkflowKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for WorkflowKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "workflow") + } +} diff --git a/wdl-ast/src/v1/workflow.rs b/wdl-ast/src/v1/workflow.rs index 0096c9c29..2a164a1c1 100644 --- a/wdl-ast/src/v1/workflow.rs +++ b/wdl-ast/src/v1/workflow.rs @@ -21,6 +21,7 @@ use crate::WorkflowDescriptionLanguage; use crate::support::child; use crate::support::children; use crate::token; +use crate::v1::TaskHintsSection; /// Represents a workflow definition. #[derive(Clone, Debug, PartialEq, Eq)] @@ -33,8 +34,8 @@ impl WorkflowDefinition { } /// Gets the items of the workflow. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + WorkflowItem::children(&self.0) } /// Gets the input section of the workflow. @@ -48,8 +49,8 @@ impl WorkflowDefinition { } /// Gets the statements of the workflow. - pub fn statements(&self) -> AstChildren { - children(&self.0) + pub fn statements(&self) -> impl Iterator { + WorkflowStatement::children(&self.0) } /// Gets the metadata section of the workflow. @@ -115,16 +116,18 @@ pub enum WorkflowItem { Metadata(MetadataSection), /// The item is a parameter meta section. ParameterMetadata(ParameterMetadataSection), - /// The item is a hints section. - Hints(WorkflowHintsSection), + /// The item is a task hints section. + TaskHints(TaskHintsSection), + /// The item is a workflow hints section. + WorkflowHints(WorkflowHintsSection), /// The item is a private bound declaration. Declaration(BoundDecl), } -impl AstNode for WorkflowItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl WorkflowItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`WorkflowItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -142,41 +145,325 @@ impl AstNode for WorkflowItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`WorkflowItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::InputSectionNode => Some(Self::Input(InputSection(syntax))), - SyntaxKind::OutputSectionNode => Some(Self::Output(OutputSection(syntax))), - SyntaxKind::ConditionalStatementNode => { - Some(Self::Conditional(ConditionalStatement(syntax))) - } - SyntaxKind::ScatterStatementNode => Some(Self::Scatter(ScatterStatement(syntax))), - SyntaxKind::CallStatementNode => Some(Self::Call(CallStatement(syntax))), - SyntaxKind::MetadataSectionNode => Some(Self::Metadata(MetadataSection(syntax))), - SyntaxKind::ParameterMetadataSectionNode => { - Some(Self::ParameterMetadata(ParameterMetadataSection(syntax))) - } - SyntaxKind::WorkflowHintsSectionNode => Some(Self::Hints(WorkflowHintsSection(syntax))), - SyntaxKind::BoundDeclNode => Some(Self::Declaration(BoundDecl(syntax))), + SyntaxKind::InputSectionNode => Some(Self::Input( + InputSection::cast(syntax).expect("input section to cast"), + )), + SyntaxKind::OutputSectionNode => Some(Self::Output( + OutputSection::cast(syntax).expect("output section to cast"), + )), + SyntaxKind::ConditionalStatementNode => Some(Self::Conditional( + ConditionalStatement::cast(syntax).expect("conditional statement to cast"), + )), + SyntaxKind::ScatterStatementNode => Some(Self::Scatter( + ScatterStatement::cast(syntax).expect("scatter statement to cast"), + )), + SyntaxKind::CallStatementNode => Some(Self::Call( + CallStatement::cast(syntax).expect("call statement to cast"), + )), + SyntaxKind::MetadataSectionNode => Some(Self::Metadata( + MetadataSection::cast(syntax).expect("metadata section to cast"), + )), + SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( + ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), + )), + SyntaxKind::TaskHintsSectionNode => Some(Self::TaskHints( + TaskHintsSection::cast(syntax).expect("task hints section to cast"), + )), + SyntaxKind::WorkflowHintsSectionNode => Some(Self::WorkflowHints( + WorkflowHintsSection::cast(syntax).expect("workflow hints section to cast"), + )), + SyntaxKind::BoundDeclNode => Some(Self::Declaration( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Input(element) => element.syntax(), + Self::Output(element) => element.syntax(), + Self::Conditional(element) => element.syntax(), + Self::Scatter(element) => element.syntax(), + Self::Call(element) => element.syntax(), + Self::Metadata(element) => element.syntax(), + Self::ParameterMetadata(element) => element.syntax(), + Self::TaskHints(element) => element.syntax(), + Self::WorkflowHints(element) => element.syntax(), + Self::Declaration(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`InputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Input`], then a reference to the inner + /// [`InputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_input_section(&self) -> Option<&InputSection> { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`InputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Input`], then the inner + /// [`InputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_input_section(self) -> Option { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`OutputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Output`], then a reference to the inner + /// [`OutputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_output_section(&self) -> Option<&OutputSection> { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`OutputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Output`], then the inner + /// [`OutputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_output_section(self) -> Option { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Conditional`], then a reference to the + /// inner [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_conditional(&self) -> Option<&ConditionalStatement> { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Conditional`], then the inner + /// [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_conditional(self) -> Option { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Scatter`], then a reference to the + /// inner [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_scatter(&self) -> Option<&ScatterStatement> { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Scatter`], then the inner + /// [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_scatter(self) -> Option { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Call`], then a reference to the inner + /// [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_call(&self) -> Option<&CallStatement> { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Call`], then the inner + /// [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_call(self) -> Option { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`MetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::Metadata`], then a reference to the + /// inner [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_metadata_section(&self) -> Option<&MetadataSection> { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::Metadata`], then the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_metadata_section(self) -> Option { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::ParameterMetadata`], then a reference + /// to the inner [`ParameterMetadataSection`] is returned wrapped in + /// [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_parameter_metadata_section(&self) -> Option<&ParameterMetadataSection> { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::ParameterMetadata`], then the inner + /// [`ParameterMetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_parameter_metadata_section(self) -> Option { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`TaskHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::TaskHints`], then a reference to the + /// inner [`TaskHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_task_hints_section(&self) -> Option<&TaskHintsSection> { + match self { + Self::TaskHints(task_hints_section) => Some(task_hints_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::Hints`], then the inner + /// [`TaskHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_task_hints_section(self) -> Option { + match self { + Self::TaskHints(task_hints_section) => Some(task_hints_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::TaskHints`], then a reference to the + /// inner [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_workflow_hints_section(&self) -> Option<&WorkflowHintsSection> { match self { - Self::Input(i) => &i.0, - Self::Output(o) => &o.0, - Self::Conditional(s) => &s.0, - Self::Scatter(s) => &s.0, - Self::Call(s) => &s.0, - Self::Metadata(m) => &m.0, - Self::ParameterMetadata(m) => &m.0, - Self::Hints(h) => &h.0, - Self::Declaration(d) => &d.0, + Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + _ => None, } } + + /// Consumes `self` and attempts to return the inner + /// [`WorkflowHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::Hints`], then the inner + /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_workflow_hints_section(self) -> Option { + match self { + Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowItem::Declaration`], then a reference to the + /// inner [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_declaration(&self) -> Option<&BoundDecl> { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowItem::Declaration`], then the inner + /// [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_declaration(self) -> Option { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Finds the first child that can be cast to an [`WorkflowItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`WorkflowItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`WorkflowItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`WorkflowItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } /// Represents a statement in a workflow definition. @@ -193,6 +480,79 @@ pub enum WorkflowStatement { } impl WorkflowStatement { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`WorkflowStatement`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::ConditionalStatementNode + | SyntaxKind::ScatterStatementNode + | SyntaxKind::CallStatementNode + | SyntaxKind::BoundDeclNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`WorkflowStatement`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::ConditionalStatementNode => Some(Self::Conditional( + ConditionalStatement::cast(syntax).expect("conditional statement to cast"), + )), + SyntaxKind::ScatterStatementNode => Some(Self::Scatter( + ScatterStatement::cast(syntax).expect("scatter statement to cast"), + )), + SyntaxKind::CallStatementNode => Some(Self::Call( + CallStatement::cast(syntax).expect("call statement to cast"), + )), + SyntaxKind::BoundDeclNode => Some(Self::Declaration( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Conditional(element) => element.syntax(), + Self::Scatter(element) => element.syntax(), + Self::Call(element) => element.syntax(), + Self::Declaration(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Conditional`], then a reference to + /// the inner [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_conditional(&self) -> Option<&ConditionalStatement> { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Conditional`], then the inner + /// [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_conditional(self) -> Option { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + /// Unwraps the statement into a conditional statement. /// /// # Panics @@ -205,6 +565,31 @@ impl WorkflowStatement { } } + /// Attempts to get a reference to the inner [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Scatter`], then a reference to the + /// inner [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_scatter(&self) -> Option<&ScatterStatement> { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Scatter`], then the inner + /// [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_scatter(self) -> Option { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + /// Unwraps the statement into a scatter statement. /// /// # Panics @@ -217,6 +602,31 @@ impl WorkflowStatement { } } + /// Attempts to get a reference to the inner [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Call`], then a reference to the + /// inner [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_call(&self) -> Option<&CallStatement> { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Call`], then the inner + /// [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_call(self) -> Option { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + /// Unwraps the statement into a call statement. /// /// # Panics @@ -229,57 +639,59 @@ impl WorkflowStatement { } } + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowStatement::Declaration`], then a reference to + /// the inner [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_declaration(&self) -> Option<&BoundDecl> { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowStatement::Declaration`], then the inner + /// [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_declaration(self) -> Option { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + /// Unwraps the statement into a bound declaration. /// /// # Panics /// /// Panics if the statement is not a bound declaration. - pub fn unwrap_bound_decl(self) -> BoundDecl { + pub fn unwrap_declaration(self) -> BoundDecl { match self { - Self::Declaration(stmt) => stmt, + Self::Declaration(declaration) => declaration, _ => panic!("not a bound declaration"), } } -} - -impl AstNode for WorkflowStatement { - type Language = WorkflowDescriptionLanguage; - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::ConditionalStatementNode - | SyntaxKind::ScatterStatementNode - | SyntaxKind::CallStatementNode - | SyntaxKind::BoundDeclNode - ) - } - - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::ConditionalStatementNode => { - Some(Self::Conditional(ConditionalStatement(syntax))) - } - SyntaxKind::ScatterStatementNode => Some(Self::Scatter(ScatterStatement(syntax))), - SyntaxKind::CallStatementNode => Some(Self::Call(CallStatement(syntax))), - SyntaxKind::BoundDeclNode => Some(Self::Declaration(BoundDecl(syntax))), - _ => None, - } + /// Finds the first child that can be cast to an [`WorkflowStatement`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`WorkflowStatement`] + /// to implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Conditional(s) => &s.0, - Self::Scatter(s) => &s.0, - Self::Call(s) => &s.0, - Self::Declaration(d) => &d.0, - } + /// Finds all children that can be cast to an [`WorkflowStatement`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring + /// [`WorkflowStatement`] to implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -290,12 +702,12 @@ pub struct ConditionalStatement(pub(crate) SyntaxNode); impl ConditionalStatement { /// Gets the expression of the conditional statement pub fn expr(&self) -> Expr { - child(&self.0).expect("expected a conditional expression") + Expr::child(&self.0).expect("expected a conditional expression") } /// Gets the statements of the conditional body. - pub fn statements(&self) -> AstChildren { - children(&self.0) + pub fn statements(&self) -> impl Iterator { + WorkflowStatement::children(&self.0) } } @@ -336,12 +748,12 @@ impl ScatterStatement { /// Gets the scatter expression. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected a scatter expression") + Expr::child(&self.0).expect("expected a scatter expression") } /// Gets the statements of the scatter body. - pub fn statements(&self) -> AstChildren { - children(&self.0) + pub fn statements(&self) -> impl Iterator { + WorkflowStatement::children(&self.0) } } @@ -547,7 +959,7 @@ impl CallInputItem { /// The optional expression for the input. pub fn expr(&self) -> Option { - child(&self.0) + Expr::child(&self.0) } /// Gets the call statement for the call input item. diff --git a/wdl-ast/src/visitor.rs b/wdl-ast/src/visitor.rs index 0c8bd9de9..48653aa07 100644 --- a/wdl-ast/src/visitor.rs +++ b/wdl-ast/src/visitor.rs @@ -23,7 +23,6 @@ use rowan::WalkEvent; -use crate::AstNode; use crate::AstToken as _; use crate::Comment; use crate::Document; @@ -426,11 +425,16 @@ pub(crate) fn visit(root: &SyntaxNode, state: &mut V::State, visitor SyntaxKind::LiteralNullNode => { // Skip these nodes as they're part of a metadata section } - k if Expr::can_cast(k) => visitor.expr( - state, - reason, - &Expr::cast(element.into_node().unwrap()).expect("node should cast"), - ), + k if Expr::can_cast(k) => { + visitor.expr( + state, + reason, + &Expr::cast(element.into_node().expect( + "any element that is able to be turned into an expr should be a node", + )) + .expect("expr should be built"), + ) + } SyntaxKind::LiteralMapItemNode | SyntaxKind::LiteralObjectItemNode | SyntaxKind::LiteralStructItemNode diff --git a/wdl-ast/tests/validation.rs b/wdl-ast/tests/validation.rs index 071a707e4..eb460ee58 100644 --- a/wdl-ast/tests/validation.rs +++ b/wdl-ast/tests/validation.rs @@ -31,6 +31,7 @@ use wdl_ast::Diagnostic; use wdl_ast::Document; use wdl_ast::Validator; +/// Finds tests for grammar validation. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -58,6 +59,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a result. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -68,6 +70,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Formats diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -84,6 +87,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compares a single result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), String> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -115,6 +119,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), Strin Ok(()) } +/// Runs a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path) diff --git a/wdl-config/src/loader.rs b/wdl-config/src/loader.rs new file mode 100644 index 000000000..6d7e605e2 --- /dev/null +++ b/wdl-config/src/loader.rs @@ -0,0 +1,127 @@ +use std::collections::VecDeque; +use std::convert::Infallible; +use std::path::PathBuf; + +use config::ConfigError; +use config::Environment; +use config::File; + +use crate::providers::EnvProvider; +use crate::providers::FileProvider; +use crate::BoxedProvider; +use crate::Config; +use crate::Provider; +use crate::CONFIG_SEARCH_PATHS; + +#[derive(Debug)] +pub enum Error { + /// An error from the `config` crate. + Config(ConfigError), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Config(err) => write!(f, "`config` error: {err}"), + } + } +} + +impl std::error::Error for Error {} + +/// A [`Result`](std::result::Result) with an [`Error`]. +pub type Result = std::result::Result; + +pub struct Loader(VecDeque); + +impl Loader { + /// Creates an empty [`Loader`]. + pub fn empty() -> Self { + Self(VecDeque::new()) + } + + /// Adds the default configuration to the front of the provider stack. + pub fn with_default_configuration(mut self) -> Self { + // NOTE: default configuration should always be the first provider evaluated. + self.0.push_front(Config::default().into()); + self + } + + /// Adds a file to the search path of the [`Loader`]. + /// + /// Note that the file is not required to be present. + pub fn add_optional_file(mut self, path: PathBuf) -> Self { + self.0.push_back(FileProvider::optional(path).into()); + self + } + + /// Adds a file to the search path of the [`Loader`]. + /// + /// Note that the file is required to be present. + pub fn add_required_file(mut self, path: PathBuf) -> Self { + self.0.push_back(FileProvider::required(path).into()); + self + } + + /// Adds the default search paths to the [`Loader`]. + pub fn with_default_search_paths(mut self) -> Self { + for path in CONFIG_SEARCH_PATHS.clone().into_iter() { + self = self.add_optional_file(path); + } + + self + } + + /// Adds a new environment prefix to the [`Loader`]. + pub fn add_env_prefix(mut self, prefix: &str) -> Self { + self.0.push_back(EnvProvider::new(prefix).into()); + self + } + + /// Adds the default environment prefix to the [`Loader`]. + pub fn with_default_env_prefix(mut self) -> Self { + self.0.push_back(EnvProvider::default().into()); + self + } + + /// Gets a reference to the inner [`ConfigBuilder`]. + pub fn inner(&self) -> &VecDeque { + &self.0 + } + + /// Consumes `self` and returns the inner [`ConfigBuilder`]. + pub fn into_inner(self) -> VecDeque { + self.0 + } + + /// Consumes `self` and attempts to load the [`Config`]. + pub fn try_load(self) -> std::result::Result> { + for provider in self.0 { + let config = provider.provide().map_err(|e| ); + } + + self.0 + .build() + .map_err(Error::Config)? + .try_deserialize() + .map_err(Error::Config) + } +} + +impl Default for Loader { + fn default() -> Self { + Self::empty() + .with_default_search_paths() + .with_default_env_prefix() + } +} + +#[cfg(test)] +mod tests { + use crate::Loader; + + #[test] + fn an_empty_loader_unwraps() { + Loader::empty(); + } +} diff --git a/wdl-format/Cargo.toml b/wdl-format/Cargo.toml new file mode 100644 index 000000000..03a435cdf --- /dev/null +++ b/wdl-format/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "wdl-format" +version = "0.1.0" +license.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] +wdl-ast = { path = "../wdl-ast", version = "0.7.1" } +nonempty.workspace = true + +[lints] +workspace = true diff --git a/wdl-format/src/config.rs b/wdl-format/src/config.rs new file mode 100644 index 000000000..15ba2e5de --- /dev/null +++ b/wdl-format/src/config.rs @@ -0,0 +1,21 @@ +//! Configuration for formatting. + +mod builder; +mod indent; + +pub use builder::Builder; +pub use indent::Indent; + +/// Configuration for formatting. +#[derive(Debug, Default)] +pub struct Config { + /// The number of characters to indent. + indent: Indent, +} + +impl Config { + /// Gets the indent level of the configuration. + pub fn indent(&self) -> Indent { + self.indent + } +} diff --git a/wdl-format/src/config/builder.rs b/wdl-format/src/config/builder.rs new file mode 100644 index 000000000..d9fe01958 --- /dev/null +++ b/wdl-format/src/config/builder.rs @@ -0,0 +1,61 @@ +//! Builders for formatting configuration. + +use crate::Config; +use crate::config::Indent; + +/// An error related to a [`Builder`]. +#[derive(Debug)] +pub enum Error { + /// A required value was missing for a builder field. + Missing(&'static str), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Missing(field) => write!( + f, + "missing required value for '{field}' in a formatter configuration builder" + ), + } + } +} + +impl std::error::Error for Error {} + +/// A [`Result`](std::result::Result) with an [`Error`]. +pub type Result = std::result::Result; + +/// A builder for a [`Config`]. +pub struct Builder { + /// The number of characters to indent. + indent: Option, +} + +impl Builder { + /// Sets the indentation level. + /// + /// # Notes + /// + /// This silently overwrites any previously provided value for the + /// indentation level. + pub fn indent(mut self, indent: Indent) -> Self { + self.indent = Some(indent); + self + } + + /// Consumes `self` and attempts to build a [`Config`]. + pub fn try_build(self) -> Result { + let indent = self.indent.ok_or(Error::Missing("indent"))?; + + Ok(Config { indent }) + } +} + +impl Default for Builder { + fn default() -> Self { + Self { + indent: Some(Default::default()), + } + } +} diff --git a/wdl-format/src/config/indent.rs b/wdl-format/src/config/indent.rs new file mode 100644 index 000000000..528f3858c --- /dev/null +++ b/wdl-format/src/config/indent.rs @@ -0,0 +1,24 @@ +//! Indentation within formatting configuration. + +use std::num::NonZeroUsize; +use std::sync::LazyLock; + +/// The default indentation. +pub static DEFAULT_INDENT: LazyLock = + LazyLock::new(|| Indent::Spaces(NonZeroUsize::new(2).unwrap())); + +/// An indentation level. +#[derive(Clone, Copy, Debug)] +pub enum Indent { + /// Tabs. + Tabs(NonZeroUsize), + + /// Spaces. + Spaces(NonZeroUsize), +} + +impl Default for Indent { + fn default() -> Self { + *DEFAULT_INDENT + } +} diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs new file mode 100644 index 000000000..ac48f901e --- /dev/null +++ b/wdl-format/src/element.rs @@ -0,0 +1,508 @@ +//! Elements used during formatting. + +use std::collections::HashMap; +use std::iter::Peekable; + +use nonempty::NonEmpty; +use wdl_ast::AstToken as _; +use wdl_ast::Element; +use wdl_ast::Node; +use wdl_ast::SyntaxKind; + +use crate::NEWLINE; +use crate::PreToken; +use crate::TokenStream; +use crate::Writable; + +pub mod node; + +/// Trivia associated with some more formidable element. +/// +/// Trivia would be things like comments and whitespace. +#[derive(Clone, Debug, Default)] +pub struct Trivia { + /// Any preceeding trivia. + preceeding: Option>>, + + /// Any inline trivia. + inline: Option>>, +} + +impl Trivia { + /// Any preceeding trivia that are not whitespaces. + pub fn preceeding(&self) -> Option> { + self.preceeding.as_ref().map(|trivia| { + trivia + .into_iter() + .filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) + .map(|t| &**t) + }) + } + + /// Any inline trivia that are not whitespaces. + pub fn inline(&self) -> Option> { + self.inline.as_ref().map(|trivia| { + trivia + .into_iter() + .filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) + .map(|t| &**t) + }) + } +} + +/// A formattable element. +#[derive(Clone, Debug)] +pub struct FormatElement { + /// The inner element. + element: Element, + + /// Trivia associated with the element. + trivia: Trivia, + + /// Children as format elements. + children: Option>>, +} + +impl FormatElement { + /// Creates a new [`FormatElement`]. + pub fn new( + element: Element, + trivia: Trivia, + children: Option>>, + ) -> Self { + Self { + element, + trivia, + children, + } + } + + /// Gets the inner element. + pub fn element(&self) -> &Element { + &self.element + } + + /// Gets the trivia. + pub fn trivia(&self) -> &Trivia { + &self.trivia + } + + /// Gets the children for this node. + pub fn children(&self) -> Option> { + self.children + .as_ref() + .map(|children| children.into_iter().map(|child| &**child)) + } + + /// Collects all of the children into a hashmap based on their + /// [`SyntaxKind`]. This is often useful when formatting if you want to, + /// say, iterate through all children of a certain kind. + /// + /// # Notes + /// + /// * This clones the underlying children. It's meant to be a cheap clone, + /// but you should be aware of the (relatively small) performance hit. + pub fn children_by_kind(&self) -> HashMap> { + let mut results = HashMap::new(); + + if let Some(children) = self.children() { + for child in children { + results + .entry(child.element().kind()) + .or_insert(Vec::new()) + // NOTE: this clone is very cheap, as the underlying + // elements are mostly reference counts. + .push(child.to_owned()) + } + } + + results + } + + /// Writes any preceeding trivia to the stream. + pub fn write_preceeding_trivia(&self, stream: &mut TokenStream) { + if let Some(trivia) = self.trivia().preceeding() { + for t in trivia.filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) { + t.write(stream); + } + } + } + + /// Writes any inline trivia to the stream. + pub fn write_inline_trivia(&self, stream: &mut TokenStream) { + if let Some(trivia) = self.trivia().inline() { + for t in trivia.filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) { + t.write(stream); + } + } + } +} + +/// An extension trait for formatting [`Element`]s. +pub trait AstElementFormatExt { + /// Consumes `self` and returns the [`Element`] as a [`FormatElement`]. + fn into_format_element(self) -> FormatElement; +} + +impl AstElementFormatExt for Element { + fn into_format_element(self) -> FormatElement + where + Self: Sized, + { + let children = match &self { + Element::Node(node) => collate(node), + Element::Token(_) => None, + }; + + FormatElement::new(self, Default::default(), children) + } +} + +/// Collects a list of iterables into an [`Option`]. +fn collect_optional(mut iter: impl Iterator) -> Option> { + if let Some(first) = iter.next() { + let mut vec = NonEmpty::new(first); + vec.extend(iter); + Some(vec) + } else { + None + } +} + +/// Takes elements while a particular predicate is true _without_ consuming the +/// element that breaks the chain. +fn take_while_peek<'a, I, P>( + iter: &'a mut Peekable, + predicate: P, +) -> impl Iterator + 'a +where + I: Iterator, + P: Fn(&I::Item) -> bool + 'a, +{ + std::iter::from_fn(move || { + if let Some(next_item) = iter.peek() { + if predicate(next_item) { + iter.next() + } else { + None + } + } else { + None + } + }) +} + +/// Collates the children of a particular node. +fn collate(node: &Node) -> Option>> { + let mut results = Vec::new(); + let mut stream = node + .syntax() + .children_with_tokens() + .map(Element::cast) + .peekable(); + + while stream.peek().is_some() { + let preceeding = collect_optional( + take_while_peek(stream.by_ref(), |node| node.is_trivia()) + .map(|item| Box::new(item.into_format_element())), + ); + + let element = match stream.next() { + Some(node) => node, + None => break, + }; + + let inline = collect_optional( + take_while_peek(stream.by_ref(), |element| { + if element.is_trivia() { + // If the element is trivia, we need to check if it contains a + // newline. + match element { + Element::Node(_) => { + // SAFETY: if this is reached, then the code needs to be + // altered. The fact that nodes should not be trivia is + // not baked into the code per se, but it's not expected + // to ever occur. If this ends up happening and it makes + // sense to change this, feel free to do so. + unreachable!("nodes should not be trivia") + } + Element::Token(token) => { + // NOTE: if the token _is_ whitespace, then return false + // only if the token contains a newline. Else, this + // should continue consuming the whitespace. + token + .as_whitespace() + .map(|whitespace| !whitespace.syntax().text().contains(NEWLINE)) + .unwrap_or(true) + } + } + } else { + // If the element isn't trivia, we don't consume it. + false + } + }) + .map(|item| Box::new(item.into_format_element())), + ); + + let children = match element { + Element::Node(ref node) => collate(node), + Element::Token(_) => None, + }; + + results.push(Box::new(FormatElement { + element, + trivia: Trivia { preceeding, inline }, + children, + })); + } + + if !results.is_empty() { + let mut results = results.into_iter(); + // SAFETY: we just checked to ensure that `results` wasn't empty, so + // this will always unwrap. + let mut children = NonEmpty::new(results.next().unwrap()); + children.extend(results); + Some(children) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use wdl_ast::Document; + use wdl_ast::Node; + use wdl_ast::SyntaxKind; + + use crate::element::node::AstNodeFormatExt; + + #[test] + fn smoke() { + let (document, diagnostics) = Document::parse( + "version 1.2 + +# This is a comment attached to the task. +task foo # This is an inline comment on the task ident. +{ + +} # This is an inline comment on the task. + +# This is a comment attached to the workflow. +workflow bar # This is an inline comment on the workflow ident. +{ + # This is attached to the call. + call foo {} +} # This is an inline comment on the workflow.", + ); + + assert!(diagnostics.is_empty()); + let document = document.ast().into_v1().unwrap(); + + let format_element = Node::Ast(document).into_format_element(); + let mut children = format_element.children().unwrap(); + + //////////////////////////////////////////////////////////////////////////////// + // Version statement + //////////////////////////////////////////////////////////////////////////////// + + let version = children.next().expect("version statement element"); + assert_eq!( + version.element().syntax().kind(), + SyntaxKind::VersionStatementNode + ); + + assert!(version.trivia().preceeding().is_none()); + assert!(version.trivia().inline().is_none()); + + let mut version_children = version.children().unwrap(); + assert_eq!( + version_children.next().unwrap().element().kind(), + SyntaxKind::VersionKeyword + ); + assert_eq!( + version_children.next().unwrap().element().kind(), + SyntaxKind::Version + ); + + //////////////////////////////////////////////////////////////////////////////// + // Task Definition + //////////////////////////////////////////////////////////////////////////////// + + let task = children.next().expect("task element"); + assert_eq!( + task.element().syntax().kind(), + SyntaxKind::TaskDefinitionNode + ); + + // Preceeding. + + let mut preceeding = task.trivia().preceeding().unwrap(); + + let comment = preceeding + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(comment.kind(), SyntaxKind::Comment); + assert_eq!(comment.text(), "# This is a comment attached to the task."); + + // Inline. + + let mut inline = task.trivia().inline().unwrap(); + + let comment = inline + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(comment.kind(), SyntaxKind::Comment); + assert_eq!(comment.text(), "# This is an inline comment on the task."); + + assert!(inline.next().is_none()); + + // Children. + + let mut task_children = task.children().unwrap(); + assert_eq!( + task_children.next().unwrap().element().kind(), + SyntaxKind::TaskKeyword + ); + + let ident = task_children.next().unwrap(); + assert_eq!(ident.element().kind(), SyntaxKind::Ident); + + let mut ident_inline = ident.trivia().inline().unwrap(); + + let inline_comment = ident_inline + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(inline_comment.kind(), SyntaxKind::Comment); + assert_eq!( + inline_comment.text(), + "# This is an inline comment on the task ident." + ); + + assert_eq!( + task_children.next().unwrap().element().kind(), + SyntaxKind::OpenBrace + ); + assert_eq!( + task_children.next().unwrap().element().kind(), + SyntaxKind::CloseBrace + ); + + assert!(task_children.next().is_none()); + + //////////////////////////////////////////////////////////////////////////////// + // Workflow Definition + //////////////////////////////////////////////////////////////////////////////// + + let workflow = children.next().expect("workflow element"); + assert_eq!( + workflow.element().syntax().kind(), + SyntaxKind::WorkflowDefinitionNode + ); + + // Preceeding. + + let mut preceeding = workflow.trivia().preceeding().unwrap(); + + let comment = preceeding + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(comment.kind(), SyntaxKind::Comment); + assert_eq!( + comment.text(), + "# This is a comment attached to the workflow." + ); + + // Inline. + + let mut inline = workflow.trivia().inline().unwrap(); + + let comment = inline + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(comment.kind(), SyntaxKind::Comment); + assert_eq!( + comment.text(), + "# This is an inline comment on the workflow." + ); + + assert!(inline.next().is_none()); + + // Children. + + let mut workflow_children = workflow.children().unwrap(); + + assert_eq!( + workflow_children.next().unwrap().element().kind(), + SyntaxKind::WorkflowKeyword + ); + + let ident = workflow_children.next().unwrap(); + assert_eq!(ident.element().kind(), SyntaxKind::Ident); + + let mut ident_inline = ident.trivia().inline().unwrap(); + + let inline_comment = ident_inline + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(inline_comment.kind(), SyntaxKind::Comment); + assert_eq!( + inline_comment.text(), + "# This is an inline comment on the workflow ident." + ); + + assert!(ident_inline.next().is_none()); + + assert_eq!( + workflow_children.next().unwrap().element().kind(), + SyntaxKind::OpenBrace + ); + + let call = workflow_children.next().unwrap(); + assert_eq!(call.element().kind(), SyntaxKind::CallStatementNode); + + let mut call_preceeding = call.trivia().preceeding().unwrap(); + + let comment = call_preceeding + .next() + .unwrap() + .element() + .syntax() + .into_token() + .unwrap(); + assert_eq!(comment.kind(), SyntaxKind::Comment); + assert_eq!(comment.text(), "# This is attached to the call."); + + assert!(call_preceeding.next().is_none()); + + assert_eq!( + workflow_children.next().unwrap().element().kind(), + SyntaxKind::CloseBrace + ); + + assert!(workflow_children.next().is_none()); + } +} diff --git a/wdl-format/src/element/node.rs b/wdl-format/src/element/node.rs new file mode 100644 index 000000000..0e96bb689 --- /dev/null +++ b/wdl-format/src/element/node.rs @@ -0,0 +1,23 @@ +//! A wrapper for formatting [`AstNode`]s. + +use wdl_ast::Element; +use wdl_ast::Node; + +use crate::element::FormatElement; +use crate::element::collate; + +/// An extension trait for formatting [`Node`]s. +pub trait AstNodeFormatExt { + /// Consumes `self` and returns the [`Node`] as a [`FormatElement`]. + fn into_format_element(self) -> FormatElement; +} + +impl AstNodeFormatExt for Node { + fn into_format_element(self) -> FormatElement + where + Self: Sized, + { + let children = collate(&self); + FormatElement::new(Element::Node(self), Default::default(), children) + } +} diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs new file mode 100644 index 000000000..c862ac472 --- /dev/null +++ b/wdl-format/src/lib.rs @@ -0,0 +1,227 @@ +//! Formatting facilities for WDL. + +pub mod config; +pub mod element; +mod token; +pub mod v1; + +use std::fmt::Write; + +pub use config::Config; +pub use token::*; +use wdl_ast::Element; +use wdl_ast::Node as AstNode; + +use crate::element::FormatElement; + +/// Newline constant used for formatting on windows platforms. +#[cfg(windows)] +pub const NEWLINE: &str = "\r\n"; +/// Newline constant used for formatting on non-windows platforms. +#[cfg(not(windows))] +pub const NEWLINE: &str = "\n"; + +/// A space. +pub const SPACE: &str = " "; + +/// Returns exactly one entity from an enumerable list of entities (usually a +/// [`Vec`]). +#[macro_export] +macro_rules! exactly_one { + ($entities:expr, $name:expr) => { + match $entities.len() { + 0 => unreachable!("we should never have zero {}", $name), + // SAFETY: we just checked to ensure that exactly + // one element exists, so this will always unwrap. + 1 => $entities.pop().unwrap(), + _ => unreachable!("we should not have two or more {}", $name), + } + }; +} + +/// An element that can be written to a token stream. +pub trait Writable { + /// Writes the element to the token stream. + fn write(&self, stream: &mut TokenStream); +} + +impl Writable for &FormatElement { + fn write(&self, stream: &mut TokenStream) { + self.write_preceeding_trivia(stream); + + match self.element() { + Element::Node(node) => match node { + AstNode::AccessExpr(_) => todo!(), + AstNode::AdditionExpr(_) => todo!(), + AstNode::ArrayType(_) => todo!(), + AstNode::Ast(_) => v1::format_ast(self, stream), + AstNode::BoundDecl(_) => todo!(), + AstNode::CallAfter(_) => todo!(), + AstNode::CallAlias(_) => todo!(), + AstNode::CallExpr(_) => todo!(), + AstNode::CallInputItem(_) => todo!(), + AstNode::CallStatement(_) => { + v1::workflow::call::format_call_statement(self, stream) + } + AstNode::CallTarget(_) => v1::workflow::call::format_call_target(self, stream), + AstNode::CommandSection(_) => todo!(), + AstNode::ConditionalStatement(_) => todo!(), + AstNode::DefaultOption(_) => todo!(), + AstNode::DivisionExpr(_) => todo!(), + AstNode::EqualityExpr(_) => todo!(), + AstNode::ExponentiationExpr(_) => todo!(), + AstNode::GreaterEqualExpr(_) => todo!(), + AstNode::GreaterExpr(_) => todo!(), + AstNode::IfExpr(_) => todo!(), + AstNode::ImportAlias(_) => todo!(), + AstNode::ImportStatement(_) => todo!(), + AstNode::IndexExpr(_) => todo!(), + AstNode::InequalityExpr(_) => todo!(), + AstNode::InputSection(_) => todo!(), + AstNode::LessEqualExpr(_) => todo!(), + AstNode::LessExpr(_) => todo!(), + AstNode::LiteralArray(_) => todo!(), + AstNode::LiteralBoolean(_) => todo!(), + AstNode::LiteralFloat(_) => todo!(), + AstNode::LiteralHints(_) => todo!(), + AstNode::LiteralHintsItem(_) => todo!(), + AstNode::LiteralInput(_) => todo!(), + AstNode::LiteralInputItem(_) => todo!(), + AstNode::LiteralInteger(_) => todo!(), + AstNode::LiteralMap(_) => todo!(), + AstNode::LiteralMapItem(_) => todo!(), + AstNode::LiteralNone(_) => todo!(), + AstNode::LiteralNull(_) => todo!(), + AstNode::LiteralObject(_) => todo!(), + AstNode::LiteralObjectItem(_) => todo!(), + AstNode::LiteralOutput(_) => todo!(), + AstNode::LiteralOutputItem(_) => todo!(), + AstNode::LiteralPair(_) => todo!(), + AstNode::LiteralString(_) => todo!(), + AstNode::LiteralStruct(_) => todo!(), + AstNode::LiteralStructItem(_) => todo!(), + AstNode::LogicalAndExpr(_) => todo!(), + AstNode::LogicalNotExpr(_) => todo!(), + AstNode::LogicalOrExpr(_) => todo!(), + AstNode::MapType(_) => todo!(), + AstNode::MetadataArray(_) => todo!(), + AstNode::MetadataObject(_) => todo!(), + AstNode::MetadataObjectItem(_) => todo!(), + AstNode::MetadataSection(_) => todo!(), + AstNode::ModuloExpr(_) => todo!(), + AstNode::MultiplicationExpr(_) => todo!(), + AstNode::NameRef(_) => todo!(), + AstNode::NegationExpr(_) => todo!(), + AstNode::OutputSection(_) => todo!(), + AstNode::PairType(_) => todo!(), + AstNode::ObjectType(_) => todo!(), + AstNode::ParameterMetadataSection(_) => todo!(), + AstNode::ParenthesizedExpr(_) => todo!(), + AstNode::Placeholder(_) => todo!(), + AstNode::PrimitiveType(_) => todo!(), + AstNode::RequirementsItem(_) => todo!(), + AstNode::RequirementsSection(_) => todo!(), + AstNode::RuntimeItem(_) => todo!(), + AstNode::RuntimeSection(_) => todo!(), + AstNode::ScatterStatement(_) => todo!(), + AstNode::SepOption(_) => todo!(), + AstNode::StructDefinition(_) => todo!(), + AstNode::SubtractionExpr(_) => todo!(), + AstNode::TaskDefinition(_) => v1::task::format_task_definition(self, stream), + AstNode::TaskHintsItem(_) => todo!(), + AstNode::TaskHintsSection(_) => todo!(), + AstNode::TrueFalseOption(_) => todo!(), + AstNode::TypeRef(_) => todo!(), + AstNode::UnboundDecl(_) => todo!(), + AstNode::VersionStatement(_) => v1::format_version_statement(self, stream), + AstNode::WorkflowDefinition(_) => { + v1::workflow::format_workflow_definition(self, stream) + } + AstNode::WorkflowHintsItem(_) => todo!(), + AstNode::WorkflowHintsSection(_) => todo!(), + }, + Element::Token(token) => { + stream.push_ast_token(token); + } + } + + self.write_inline_trivia(stream); + } +} + +/// A formatter. +#[derive(Debug, Default)] +pub struct Formatter { + /// The configuration. + config: Config, +} + +impl Formatter { + /// Creates a new formatter. + pub fn new(config: Config) -> Self { + Self { config } + } + + /// Gets the configuration for this formatter. + pub fn config(&self) -> &Config { + &self.config + } + + /// Formats an element. + pub fn format(&self, element: W) -> std::result::Result { + let mut result = String::new(); + + for token in self.to_stream(element) { + write!(result, "{token}")?; + } + + Ok(result) + } + + /// Gets the [`PostToken`] stream. + /// + /// # Notes + /// + /// * This shouldn't be exposed publicly. + fn to_stream(&self, element: W) -> TokenStream { + let mut stream = TokenStream::default(); + element.write(&mut stream); + + let mut postprocessor = Postprocessor::default(); + postprocessor.run(stream) + } +} + +#[cfg(test)] +mod tests { + use wdl_ast::Document; + use wdl_ast::Node; + + use crate::Formatter; + use crate::element::node::AstNodeFormatExt as _; + + #[test] + fn smoke() { + let (document, diagnostics) = Document::parse( + "version 1.2 + +# This is a comment attached to the task. +task foo # This is an inline comment on the task ident. +{ + +} # This is an inline comment on the task. + +# This is a comment attached to the workflow. +workflow bar # This is an inline comment on the workflow ident. +{ + # This is attached to the call. + call foo {} +} # This is an inline comment on the workflow.", + ); + + assert!(diagnostics.is_empty()); + let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); + let stream = Formatter::default().to_stream(&document).to_string(); + println!("{stream}"); + } +} diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs new file mode 100644 index 000000000..82d17003c --- /dev/null +++ b/wdl-format/src/token.rs @@ -0,0 +1,69 @@ +//! Tokens used during formatting. + +mod post; +mod pre; + +use std::fmt::Display; + +pub use post::*; +pub use pre::*; + +/// Tokens that are streamable. +pub trait Token: Display + Eq + PartialEq {} + +/// A stream of tokens. Tokens in this case are either [`PreToken`]s or +/// [`PostToken`]s. Note that, unless you are working on formatting +/// specifically, you should never need to work with [`PostToken`]s. +#[derive(Debug)] + +pub struct TokenStream(Vec); + +impl Default for TokenStream { + fn default() -> Self { + Self(Default::default()) + } +} + +impl std::fmt::Display for TokenStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for value in &self.0 { + write!(f, "{value}")?; + } + + Ok(()) + } +} + +impl TokenStream { + /// Pushes a token into the stream. + pub fn push(&mut self, token: T) { + self.0.push(token); + } + + /// Removes any number of `token`s at the end of the stream. + pub fn trim_end(&mut self, token: &T) { + while Some(token) == self.0.last() { + let _ = self.0.pop(); + } + } + + /// Removes any number of `token`s at the end of the stream. + pub fn trim_while bool>(&mut self, predicate: F) { + while let Some(token) = self.0.last() { + if !predicate(token) { + break; + } + + let _ = self.0.pop(); + } + } +} + +impl IntoIterator for TokenStream { + type IntoIter = std::vec::IntoIter; + type Item = T; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs new file mode 100644 index 000000000..20b7f2971 --- /dev/null +++ b/wdl-format/src/token/post.rs @@ -0,0 +1,118 @@ +//! Postprocessed tokens. +//! +//! Generally speaking, unless you are working with the internals of code +//! formatting, you're not going to be working with these. + +use wdl_ast::SyntaxKind; + +use crate::NEWLINE; +use crate::PreToken; +use crate::SPACE; +use crate::Token; +use crate::TokenStream; + +/// A postprocessed token. +/// +/// Note that this will transformed into a [`TokenStream`](super::TokenStream) +/// of [`PostToken`](super::PostToken)s by a +/// [`Postprocessor`](super::Postprocessor) (authors of elements are never +/// expected to write [`PostToken`](super::PostToken)s directly). +#[derive(Eq, PartialEq)] +pub enum PostToken { + /// A space. + Space, + + /// A newline. + Newline, + + /// A string literal. + Literal(String), +} + +impl std::fmt::Debug for PostToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Space => write!(f, ""), + Self::Newline => write!(f, ""), + Self::Literal(value) => write!(f, " {value}"), + } + } +} + +impl std::fmt::Display for PostToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PostToken::Space => write!(f, "{SPACE}"), + PostToken::Newline => write!(f, "{NEWLINE}"), + PostToken::Literal(value) => write!(f, "{value}"), + } + } +} + +impl Token for PostToken {} + +/// The state of the postprocessor. +#[derive(Default, Eq, PartialEq)] +enum State { + /// The start of a line in the document. + #[default] + StartOfLine, + + /// The middle of a line. + MiddleOfLine, +} + +/// A postprocessor of [tokens](PreToken). +#[derive(Default)] +pub struct Postprocessor(State); + +impl Postprocessor { + /// Runs the postprocessor. + pub fn run(&mut self, input: TokenStream) -> TokenStream { + let mut output = TokenStream::::default(); + + for token in input { + self.step(token, &mut output) + } + + output.trim_while(|token| matches!(token, PostToken::Space | PostToken::Newline)); + output.push(PostToken::Newline); + + output + } + + /// Takes a step of a [`PreToken`] stream and processes the appropriate + /// [`PostToken`]s. + pub fn step(&mut self, token: PreToken, stream: &mut TokenStream) { + match token { + PreToken::SectionSpacer => { + if self.0 != State::StartOfLine { + self.newline(stream) + } + + self.newline(stream); + } + PreToken::Literal(value, kind) => { + match self.0 { + State::StartOfLine | State::MiddleOfLine => { + stream.push(PostToken::Literal(value)); + } + } + + if kind == SyntaxKind::Comment { + self.newline(stream); + } else { + stream.push(PostToken::Space); + self.0 = State::MiddleOfLine; + } + } + } + } + + /// Adds a newline to the stream and modifies the state accordingly. + fn newline(&mut self, stream: &mut TokenStream) { + stream.trim_end(&PostToken::Space); + stream.push(PostToken::Newline); + self.0 = State::StartOfLine; + } +} diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs new file mode 100644 index 000000000..d9f8916c2 --- /dev/null +++ b/wdl-format/src/token/pre.rs @@ -0,0 +1,75 @@ +//! Tokens emitted during the formatting of particular elements. + +use wdl_ast::SyntaxKind; + +use crate::Token; +use crate::TokenStream; + +/// A token that can be written by elements. +/// +/// These are tokens that are intended to be written directly by elements to a +/// [`TokenStream`](super::TokenStream) consisting of [`PreToken`]s. Note that +/// this will transformed into a [`TokenStream`](super::TokenStream) of +/// [`PostToken`](super::PostToken)s by a +/// [`Postprocessor`](super::Postprocessor) (authors of elements are never +/// expected to write [`PostToken`](super::PostToken)s directly). +#[derive(Debug, Eq, PartialEq)] +pub enum PreToken { + /// A section spacer. + SectionSpacer, + + /// Includes text literally in the output. + Literal(String, SyntaxKind), +} + +impl PreToken { + /// Gets the [`SyntaxKind`] of the token if the token is a + /// [`PreToken::Literal`]. + pub fn kind(&self) -> Option<&SyntaxKind> { + match self { + PreToken::Literal(_, kind) => Some(kind), + _ => None, + } + } +} + +/// The line length to use when displaying pretokens. +const DISPLAY_LINE_LENGTH: usize = 88; + +impl std::fmt::Display for PreToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PreToken::SectionSpacer => write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)), + PreToken::Literal(value, kind) => { + write!( + f, + "{:width$}", + value, + kind, + width = DISPLAY_LINE_LENGTH + ) + } + } + } +} + +impl Token for PreToken {} + +impl TokenStream { + /// Inserts an element spacer to the stream. + pub fn section_spacer(&mut self) { + self.0.push(PreToken::SectionSpacer); + } + + /// Pushes an AST token into the stream. + pub fn push_ast_token(&mut self, token: &wdl_ast::Token) { + let syntax = token.syntax(); + let token = PreToken::Literal(syntax.text().to_owned(), syntax.kind()); + self.0.push(token); + } + + /// Gets an iterator of references to each token in the stream. + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } +} diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs new file mode 100644 index 000000000..fcbac2143 --- /dev/null +++ b/wdl-format/src/v1.rs @@ -0,0 +1,66 @@ +//! Formatting of WDL v1.x elements. + +use wdl_ast::SyntaxKind; + +pub mod task; +pub mod workflow; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; +use crate::exactly_one; + +/// Formats an [`Ast`](wdl_ast::Ast). +pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut versions) = children.remove(&SyntaxKind::VersionStatementNode) { + let version = exactly_one!(versions, "version statements"); + + // TODO(clay): improve this by removing the reference. + (&version).write(stream); + } + + stream.section_spacer(); + + if let Some(tasks) = children.remove(&SyntaxKind::TaskDefinitionNode) { + for task in tasks { + (&task).write(stream); + stream.section_spacer(); + } + } + + if let Some(workflows) = children.remove(&SyntaxKind::WorkflowDefinitionNode) { + for workflow in workflows { + (&workflow).write(stream); + stream.section_spacer(); + } + } + + if !children.is_empty() { + todo!("unhandled children for AST: {:#?}", children.keys()); + } +} + +/// Formats a [`VersionStatement`](wdl_ast::VersionStatement). +pub fn format_version_statement(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut keywords) = children.remove(&SyntaxKind::VersionKeyword) { + let keyword = exactly_one!(keywords, "`version` keywords"); + (&keyword).write(stream); + } + + if let Some(mut versions) = children.remove(&SyntaxKind::Version) { + let version = exactly_one!(versions, "versions"); + (&version).write(stream); + } + + if !children.is_empty() { + todo!( + "unhandled children for version statement: {:#?}", + children.keys() + ); + } +} diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs new file mode 100644 index 000000000..7450c1086 --- /dev/null +++ b/wdl-format/src/v1/task.rs @@ -0,0 +1,41 @@ +//! Formatting for tasks. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; +use crate::exactly_one; + +/// Formats a [`TaskDefinition`](wdl_ast::v1::TaskDefinition). +pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut keywords) = children.remove(&SyntaxKind::TaskKeyword) { + let keyword = exactly_one!(keywords, "task keywords"); + (&keyword).write(stream); + } + + if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { + let idents = exactly_one!(idents, "idents"); + (&idents).write(stream); + } + + if let Some(mut braces) = children.remove(&SyntaxKind::OpenBrace) { + let brace = exactly_one!(braces, "open braces"); + (&brace).write(stream); + } + + if let Some(mut braces) = children.remove(&SyntaxKind::CloseBrace) { + let brace = exactly_one!(braces, "closed braces"); + (&brace).write(stream); + } + + if !children.is_empty() { + todo!( + "unhandled children for task definition: {:#?}", + children.keys() + ); + } +} diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs new file mode 100644 index 000000000..488d2ac03 --- /dev/null +++ b/wdl-format/src/v1/workflow.rs @@ -0,0 +1,49 @@ +//! Formatting for workflows. + +pub mod call; + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; +use crate::exactly_one; + +/// Formats a [`WorkflowDefinition`](wdl_ast::v1::WorkflowDefinition). +pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut keywords) = children.remove(&SyntaxKind::WorkflowKeyword) { + let keyword = exactly_one!(keywords, "workflow keywords"); + (&keyword).write(stream); + } + + if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { + let idents = exactly_one!(idents, "idents"); + (&idents).write(stream); + } + + if let Some(mut braces) = children.remove(&SyntaxKind::OpenBrace) { + let brace = exactly_one!(braces, "open braces"); + (&brace).write(stream); + } + + if let Some(calls) = children.remove(&SyntaxKind::CallStatementNode) { + for call in calls { + (&call).write(stream); + } + } + + if let Some(mut braces) = children.remove(&SyntaxKind::CloseBrace) { + let brace = exactly_one!(braces, "closed braces"); + (&brace).write(stream); + } + + if !children.is_empty() { + todo!( + "unhandled children for workflow definition: {:#?}", + children.keys() + ); + } +} diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs new file mode 100644 index 000000000..8cc06e5f1 --- /dev/null +++ b/wdl-format/src/v1/workflow/call.rs @@ -0,0 +1,58 @@ +//! Formatting for workflow calls. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; +use crate::exactly_one; + +/// Formats a [`CallStatement`](wdl_ast::v1::CallStatement). +pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut keywords) = children.remove(&SyntaxKind::CallKeyword) { + let keyword = exactly_one!(keywords, "call keywords"); + (&keyword).write(stream); + } + + if let Some(mut call_nodes) = children.remove(&SyntaxKind::CallTargetNode) { + let call_node = exactly_one!(call_nodes, "call target nodes"); + (&call_node).write(stream); + } + + if let Some(mut open_braces) = children.remove(&SyntaxKind::OpenBrace) { + let open_brace = exactly_one!(open_braces, "open braces"); + (&open_brace).write(stream); + } + + if let Some(mut close_braces) = children.remove(&SyntaxKind::CloseBrace) { + let close_brace = exactly_one!(close_braces, "close braces"); + (&close_brace).write(stream); + } + + if !children.is_empty() { + todo!( + "unhandled children for call statement: {:#?}", + children.keys() + ); + } +} + +/// Formats a [`CallTarget`](wdl_ast::v1::CallTarget). +pub fn format_call_target(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { + let keyword = exactly_one!(idents, "idents"); + (&keyword).write(stream); + } + + if !children.is_empty() { + todo!( + "unhandled children for call statement: {:#?}", + children.keys() + ); + } +} diff --git a/wdl-grammar/Cargo.toml b/wdl-grammar/Cargo.toml index b83f50f37..c6f3ca113 100644 --- a/wdl-grammar/Cargo.toml +++ b/wdl-grammar/Cargo.toml @@ -11,8 +11,10 @@ repository = "https://github.com/stjude-rust-labs/wdl" documentation = "https://docs.rs/wdl-grammar" [dependencies] +itertools = { workspace = true } logos = { workspace = true } rowan = { workspace = true } +strum = { version = "0.26", features = ["derive"] } codespan-reporting = { workspace = true, optional = true } [dev-dependencies] @@ -24,6 +26,9 @@ codespan-reporting = { workspace = true } [features] codespan = ["dep:codespan-reporting"] +[lints] +workspace = true + [[test]] name = "parsing" required-features = ["codespan"] diff --git a/wdl-grammar/src/tree.rs b/wdl-grammar/src/tree.rs index 34fd6e91c..bc473d6ec 100644 --- a/wdl-grammar/src/tree.rs +++ b/wdl-grammar/src/tree.rs @@ -3,10 +3,15 @@ pub mod dive; use std::borrow::Cow; +use std::collections::VecDeque; use std::fmt; +use itertools::FoldWhile; +use itertools::Itertools as _; +use rowan::Direction; use rowan::GreenNodeBuilder; use rowan::GreenNodeData; +use strum::VariantArray; use super::Diagnostic; use super::grammar; @@ -22,7 +27,7 @@ use crate::parser::Parser; /// Tokens are terminal and represent any span of the source. /// /// This enumeration is a union of all supported WDL tokens and nodes. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, VariantArray)] #[repr(u16)] pub enum SyntaxKind { /// The token is unknown to WDL. @@ -261,9 +266,9 @@ pub enum SyntaxKind { MetadataObjectNode, /// Represents a metadata array node. MetadataArrayNode, - /// Represents a literal integer node. + /// Represents a literal integer node. LiteralIntegerNode, - /// Represents a literal float node. + /// Represents a literal float node. LiteralFloatNode, /// Represents a literal boolean node. LiteralBooleanNode, @@ -373,6 +378,23 @@ pub enum SyntaxKind { MAX, } +impl SyntaxKind { + /// Returns whether the token is a symbolic [`SyntaxKind`]. + /// + /// Generally speaking, symbolic [`SyntaxKind`]s have special meanings + /// during parsing—they are not real elements of the grammar but rather an + /// implementation detail. + pub fn is_symbolic(&self) -> bool { + matches!( + self, + SyntaxKind::Abandoned | SyntaxKind::Unknown | SyntaxKind::Unparsed | SyntaxKind::MAX + ) + } +} + +/// Every [`SyntaxKind`] variant. +pub static ALL_SYNTAX_KIND: &[SyntaxKind] = SyntaxKind::VARIANTS; + impl From for rowan::SyntaxKind { fn from(kind: SyntaxKind) -> Self { rowan::SyntaxKind(kind as u16) @@ -552,6 +574,11 @@ impl SyntaxKind { SyntaxKind::MAX => unreachable!(), } } + + /// Returns whether the [`SyntaxKind`] is trivia. + pub fn is_trivia(&self) -> bool { + matches!(self, SyntaxKind::Whitespace | SyntaxKind::Comment) + } } /// Represents the Workflow Definition Language (WDL). @@ -690,3 +717,326 @@ impl fmt::Debug for SyntaxTree { self.0.fmt(f) } } + +/// Gathers comments and blank lines from a [`SyntaxExt`]. +fn gather_trivia( + source: &T, + direction: Direction, + break_on_newline: bool, +) -> Box<[String]> { + let iter = source.siblings_with_tokens(direction); + + /// Adds the text to the currently collecting buffer in the right place + /// depending in the direction we are traversing. + fn push_results(text: String, results: &mut VecDeque, direction: &Direction) { + match direction { + Direction::Next => results.push_back(text), + Direction::Prev => results.push_front(text), + } + } + + let comments = iter + .skip_while(|e| source.matches(e)) + .take_while(|e| matches!(e.kind(), SyntaxKind::Comment | SyntaxKind::Whitespace)) + .fold_while(VecDeque::new(), |mut results, e| { + match e.kind() { + SyntaxKind::Comment => { + // Check if e is a comment on its own line. + // If direction is 'Next' then we already know that the + // comment is on its own line. + if direction == Direction::Prev { + if let Some(prev) = e.prev_sibling_or_token() { + if prev.kind() == SyntaxKind::Whitespace { + let newlines = prev + .clone() + .into_token() + .expect("whitespace should always be a token") + .to_string() + .chars() + .filter(|c| *c == '\n') + .count(); + + // If there are newlines in 'prev' then we know + // that the comment is on its own line. + // The comment may still be on its own line if + // 'prev' does not have newlines and nothing comes + // before 'prev'. + if newlines == 0 && prev.prev_sibling_or_token().is_some() { + return FoldWhile::Done(results); + } + } else { + // There is something else on this line before the comment. + return FoldWhile::Done(results); + } + } + } + + let text = e + .into_token() + .expect("comment should always be a token") + .to_string() + .trim_end() + .to_string(); + + push_results(text, &mut results, &direction); + } + SyntaxKind::Whitespace => { + let newlines = e + .into_token() + .expect("whitespace should always be a token") + .to_string() + .chars() + .filter(|c| *c == '\n') + .count(); + + if break_on_newline && newlines > 0 { + return FoldWhile::Done(results); + } + + if newlines > 1 { + push_results("\n".to_string(), &mut results, &direction) + } + } + // SAFETY: we just filtered out any non-comment and + // non-whitespace nodes above, so this should never occur. + _ => unreachable!(), + } + + FoldWhile::Continue(results) + }) + .into_inner(); + + // NOTE: most of the time, this conversion will be O(1). Occassionally + // it will be O(n). No allocations will ever be done. Thus, the + // ammortized cost of this is quite cheap. + Vec::from(comments).into_boxed_slice() +} + +/// An extension trait for [`SyntaxNode`]s, [`SyntaxToken`]s, and +/// [`SyntaxElement`]s. +pub trait SyntaxExt { + /// Returns whether `self` matches the provided element. + fn matches(&self, other: &SyntaxElement) -> bool; + + /// Gets the siblings with tokens. + /// + /// **NOTE:** this needed because Rowan does not encapsulate this + /// functionality in a trait. Once wrapped here, most of the functions + /// provided by this extension trait can just be provided, which simplifies + /// the code. Generally speaking, this should just defer to the underlying + /// `siblings_with_tokens` method for each type. + fn siblings_with_tokens(&self, direction: Direction) + -> Box>; + + /// Returns all of the siblings _before_ the current element. + /// + /// The siblings are returned in the order they were parsed. + fn preceding_siblings(&self) -> Box<[SyntaxElement]> { + let mut results = VecDeque::new(); + + self.siblings_with_tokens(Direction::Prev) + // NOTE: this `skip_while` is necessary because + // `siblings_with_tokens` returns the current node. + .skip_while(|e| self.matches(e)) + .for_each(|e| results.push_front(e)); + + // NOTE: most of the time, this conversion will be O(1). Occassionally + // it will be O(n). No allocations will ever be done. Thus, the + // ammortized cost of this is quite cheap. + Vec::from(results).into_boxed_slice() + } + + /// Returns all of the siblings _after_ the current element. + /// + /// The siblings are returned in the order they were parsed. + fn succeeding_siblings(&self) -> Box<[SyntaxElement]> { + let mut results = Vec::new(); + + self.siblings_with_tokens(Direction::Next) + // NOTE: this `skip_while` is necessary because + // `siblings_with_tokens` returns the current node. + .skip_while(|e| self.matches(e)) + .for_each(|e| results.push(e)); + + // NOTE: this should always be O(1) and never require any additional + // allocations. + results.into_boxed_slice() + } + + /// Gets all elements that are adjacent to a particular element (not + /// including the element itself). This means in both the forward and + /// reverse direction. + /// + /// The siblings are returned in the order they were parsed. + fn adjacent(&self) -> Box<[SyntaxElement]> { + let mut results = Vec::from(self.preceding_siblings()); + results.extend(self.succeeding_siblings().iter().cloned()); + + // NOTE: this should always be O(1) and never require any additional + // allocations. + results.into_boxed_slice() + } + + /// Gets all of the preceding comments for an element. + fn preceding_trivia(&self) -> Box<[String]> + where + Self: Sized, + { + gather_trivia(self, Direction::Prev, false) + } + + /// Gets all of the succeeding comments for an element. + fn succeeding_comments(&self) -> Box<[String]> + where + Self: Sized, + { + gather_trivia(self, Direction::Next, false) + } + + /// Get any inline comment directly following an element on the + /// same line. + fn inline_comment(&self) -> Option + where + Self: Sized, + { + gather_trivia(self, Direction::Next, true) + // NOTE: at most, there can be one contiguous comment on a line. + .first() + .cloned() + } +} + +impl SyntaxExt for SyntaxNode { + fn matches(&self, other: &SyntaxElement) -> bool { + other.as_node().map(|n| n == self).unwrap_or(false) + } + + fn siblings_with_tokens( + &self, + direction: Direction, + ) -> Box> { + Box::new(self.siblings_with_tokens(direction)) + } +} + +impl SyntaxExt for SyntaxToken { + fn matches(&self, other: &SyntaxElement) -> bool { + other.as_token().map(|n| n == self).unwrap_or(false) + } + + fn siblings_with_tokens( + &self, + direction: Direction, + ) -> Box> { + Box::new(self.siblings_with_tokens(direction)) + } +} + +impl SyntaxExt for SyntaxElement { + fn matches(&self, other: &SyntaxElement) -> bool { + self == other + } + + fn siblings_with_tokens( + &self, + direction: Direction, + ) -> Box> { + match self { + SyntaxElement::Node(node) => Box::new(node.siblings_with_tokens(direction)), + SyntaxElement::Token(token) => Box::new(token.siblings_with_tokens(direction)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::SyntaxTree; + + #[test] + fn preceding_comments() { + let (tree, diagnostics) = SyntaxTree::parse( + "version 1.2 + +# This comment should not be included +task foo {} # This comment should not be included + +# Some +# comments +# are +# long + +# Others are short + +# and, yet another +workflow foo {} # This should not be collected. + +# This comment should not be included either.", + ); + + assert!(diagnostics.is_empty()); + + let workflow = tree.root().last_child().unwrap(); + assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); + assert_eq!(workflow.preceding_trivia().as_ref(), vec![ + "\n", + "# Some", + "# comments", + "# are", + "# long", + "\n", + "# Others are short", + "\n", + "# and, yet another" + ]); + } + + #[test] + fn succeeding_comments() { + let (tree, diagnostics) = SyntaxTree::parse( + "version 1.2 + +# This comment should not be included +task foo {} + +# This should not be collected. +workflow foo {} # Here is a comment that should be collected. + +# This comment should be included too.", + ); + + assert!(diagnostics.is_empty()); + + let workflow = tree.root().last_child().unwrap(); + assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); + assert_eq!(workflow.succeeding_comments().as_ref(), vec![ + "# Here is a comment that should be collected.", + "\n", + "# This comment should be included too." + ]); + } + + #[test] + fn inline_comment() { + let (tree, diagnostics) = SyntaxTree::parse( + "version 1.2 + +# This comment should not be included +task foo {} + +# This should not be collected. +workflow foo {} # Here is a comment that should be collected. + +# This comment should not be included either.", + ); + + assert!(diagnostics.is_empty()); + + let workflow = tree.root().last_child().unwrap(); + assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); + assert_eq!( + workflow.inline_comment().as_deref(), + Some("# Here is a comment that should be collected.") + ); + } +} diff --git a/wdl-grammar/tests/parsing.rs b/wdl-grammar/tests/parsing.rs index a90590fe9..8dda10c46 100644 --- a/wdl-grammar/tests/parsing.rs +++ b/wdl-grammar/tests/parsing.rs @@ -32,6 +32,7 @@ use rayon::prelude::*; use wdl_grammar::Diagnostic; use wdl_grammar::SyntaxTree; +/// Finds tests for this package. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -59,6 +60,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a path. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -69,6 +71,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Formats diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -85,6 +88,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compares a test result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), String> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -116,6 +120,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), Strin Ok(()) } +/// Runs a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path) diff --git a/wdl-lint/Cargo.toml b/wdl-lint/Cargo.toml index 289661d12..649eda3ec 100644 --- a/wdl-lint/Cargo.toml +++ b/wdl-lint/Cargo.toml @@ -26,6 +26,9 @@ colored = { workspace = true } [features] codespan = ["wdl-ast/codespan"] +[lints] +workspace = true + [[test]] name = "lints" required-features = ["codespan"] diff --git a/wdl-lint/src/rules/deprecated_placeholder_option.rs b/wdl-lint/src/rules/deprecated_placeholder_option.rs index 56951b3a7..3cd12c9bf 100644 --- a/wdl-lint/src/rules/deprecated_placeholder_option.rs +++ b/wdl-lint/src/rules/deprecated_placeholder_option.rs @@ -142,7 +142,7 @@ impl Visitor for DeprecatedPlaceholderOptionRule { _ => return, }; - if let Some(option) = placeholder.option() { + for option in placeholder.options() { let diagnostic = match option { PlaceholderOption::Sep(option) => deprecated_sep_placeholder_option(option.span()), PlaceholderOption::Default(option) => { diff --git a/wdl-lint/src/rules/disallowed_input_name.rs b/wdl-lint/src/rules/disallowed_input_name.rs index 588e53b5f..78d3e6eb1 100755 --- a/wdl-lint/src/rules/disallowed_input_name.rs +++ b/wdl-lint/src/rules/disallowed_input_name.rs @@ -1,6 +1,5 @@ //! A lint rule that disallows redundant input names. -use wdl_ast::AstNode; use wdl_ast::AstToken; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; diff --git a/wdl-lint/src/rules/disallowed_output_name.rs b/wdl-lint/src/rules/disallowed_output_name.rs index 099ffd7ba..6ff117981 100644 --- a/wdl-lint/src/rules/disallowed_output_name.rs +++ b/wdl-lint/src/rules/disallowed_output_name.rs @@ -1,6 +1,5 @@ //! A lint rule that disallows redundant output names. -use wdl_ast::AstNode; use wdl_ast::AstToken; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; diff --git a/wdl-lint/src/rules/double_quotes.rs b/wdl-lint/src/rules/double_quotes.rs index 6b0c7c1be..8e23752fe 100644 --- a/wdl-lint/src/rules/double_quotes.rs +++ b/wdl-lint/src/rules/double_quotes.rs @@ -1,6 +1,5 @@ //! A lint rule for using double quoted strings. -use wdl_ast::AstNode; use wdl_ast::AstNodeExt; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; diff --git a/wdl-lint/src/rules/ending_newline.rs b/wdl-lint/src/rules/ending_newline.rs index 21813b62e..a05f93283 100644 --- a/wdl-lint/src/rules/ending_newline.rs +++ b/wdl-lint/src/rules/ending_newline.rs @@ -1,7 +1,6 @@ //! A lint rule for newlines at the end of the document. use wdl_ast::Ast; -use wdl_ast::AstNode; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; use wdl_ast::Document; diff --git a/wdl-lint/src/rules/section_order.rs b/wdl-lint/src/rules/section_order.rs index f4f13bb61..d1e6adcb7 100644 --- a/wdl-lint/src/rules/section_order.rs +++ b/wdl-lint/src/rules/section_order.rs @@ -166,7 +166,7 @@ impl Visitor for SectionOrderingRule { TaskItem::Requirements(_) if encountered <= State::Requirements => { encountered = State::Requirements; } - TaskItem::Hints(_) if encountered <= State::Hints => { + TaskItem::TaskHints(_) if encountered <= State::Hints => { encountered = State::Hints; } _ => { @@ -218,7 +218,7 @@ impl Visitor for SectionOrderingRule { WorkflowItem::Output(_) if encountered <= State::Output => { encountered = State::Output; } - WorkflowItem::Hints(_) if encountered <= State::Hints => { + WorkflowItem::TaskHints(_) if encountered <= State::Hints => { encountered = State::Hints; } _ => { diff --git a/wdl-lint/tests/lints.rs b/wdl-lint/tests/lints.rs index 20a42574e..6ada6439e 100644 --- a/wdl-lint/tests/lints.rs +++ b/wdl-lint/tests/lints.rs @@ -33,6 +33,7 @@ use wdl_ast::Document; use wdl_ast::Validator; use wdl_lint::LintVisitor; +/// Finds tests for this package. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -60,6 +61,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a path. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -70,6 +72,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Formats diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -86,6 +89,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compares a test result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), String> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -117,6 +121,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), Strin Ok(()) } +/// Runs a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path).map_err(|e| { @@ -154,6 +159,7 @@ fn main() { let ntests = AtomicUsize::new(0); + #[allow(clippy::missing_docs_in_private_items)] fn inner<'a>(test: &'a Path, ntests: &AtomicUsize) -> Option<(&'a str, String)> { let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); match std::panic::catch_unwind(|| { diff --git a/wdl-lsp/Cargo.toml b/wdl-lsp/Cargo.toml index 9193e4e2c..caf37c8e7 100644 --- a/wdl-lsp/Cargo.toml +++ b/wdl-lsp/Cargo.toml @@ -24,3 +24,6 @@ line-index = { workspace = true } serde_json = { workspace = true } indexmap = { workspace = true } uuid = { workspace = true, features = ["v4"] } + +[lints] +workspace = true diff --git a/wdl/Cargo.toml b/wdl/Cargo.toml index 08c3f25fc..4d63fed23 100644 --- a/wdl/Cargo.toml +++ b/wdl/Cargo.toml @@ -17,6 +17,7 @@ wdl-ast = { path = "../wdl-ast", version = "0.7.1", optional = true } wdl-lint = { path = "../wdl-lint", version = "0.6.0", optional = true } wdl-analysis = { path = "../wdl-analysis", version = "0.3.0", optional = true } wdl-lsp = { path = "../wdl-lsp", version = "0.3.0", optional = true } +wdl-format = { path = "../wdl-format", version = "0.1.0", optional = true } tracing-subscriber = { workspace = true, optional = true } clap = { workspace = true, optional = true } anyhow = { workspace = true, optional = true } @@ -34,9 +35,10 @@ anyhow = { workspace = true } codespan-reporting = { workspace = true } [features] -default = ["analysis", "ast", "grammar", "lint"] +default = ["ast", "grammar", "lint"] analysis = ["dep:wdl-analysis"] ast = ["dep:wdl-ast"] +format = ["dep:wdl-format"] grammar = ["dep:wdl-grammar"] lint = ["dep:wdl-lint"] lsp = ["dep:wdl-lsp"] @@ -56,6 +58,9 @@ cli = [ "dep:tracing", ] +[lints] +workspace = true + [[example]] name = "explore" required-features = ["codespan"] diff --git a/wdl/examples/explore.rs b/wdl/examples/explore.rs index 2a3600491..3199cfacc 100644 --- a/wdl/examples/explore.rs +++ b/wdl/examples/explore.rs @@ -39,6 +39,7 @@ struct Args { path: PathBuf, } +/// Emits diagnostics. fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Result<()> { let file = SimpleFile::new(path.to_str().context("path should be UTF-8")?, source); let mut stream = StandardStream::stdout(if std::io::stdout().is_terminal() { @@ -59,6 +60,7 @@ fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Re Ok(()) } +/// The main function. pub fn main() -> Result<()> { let args = Args::parse(); let source = read_to_string(&args.path).with_context(|| { @@ -112,6 +114,7 @@ pub fn main() -> Result<()> { Ok(()) } +/// Explores metadata. fn explore_metadata(metadata: &MetadataSection) { for item in metadata.items() { let value = item.value().syntax().text().to_string(); @@ -123,6 +126,7 @@ fn explore_metadata(metadata: &MetadataSection) { } } +/// Explores an input. fn explore_input(input: &InputSection) { for decl in input.declarations() { println!( @@ -133,6 +137,7 @@ fn explore_input(input: &InputSection) { } } +/// Explores an output. fn explore_output(output: &OutputSection) { for decl in output.declarations() { println!( diff --git a/wdl/examples/parse.rs b/wdl/examples/parse.rs index 4e698c57d..1fef87477 100644 --- a/wdl/examples/parse.rs +++ b/wdl/examples/parse.rs @@ -25,6 +25,7 @@ struct Args { path: PathBuf, } +/// Emits diagnostics. fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Result<()> { let file = SimpleFile::new(path.to_str().context("path should be UTF-8")?, source); let mut stream = StandardStream::stdout(if std::io::stdout().is_terminal() { @@ -45,6 +46,7 @@ fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Re Ok(()) } +/// The main function. pub fn main() -> Result<()> { let args = Args::parse(); let source = read_to_string(&args.path).with_context(|| { diff --git a/wdl/src/bin/wdl.rs b/wdl/src/bin/wdl.rs index d6e88a4f7..4810606f2 100644 --- a/wdl/src/bin/wdl.rs +++ b/wdl/src/bin/wdl.rs @@ -1,3 +1,8 @@ +//! The `wdl` command line tool. +//! +//! If you're here and not a developer of the `wdl` family of crates, you're +//! probably looking for +//! [Sprocket](https://github.com/stjude-rust-labs/sprocket) instead. use std::borrow::Cow; use std::fs; use std::io::IsTerminal; @@ -29,6 +34,9 @@ use wdl::ast::Validator; use wdl::lint::LintVisitor; use wdl_analysis::AnalysisResult; use wdl_analysis::Analyzer; +use wdl_ast::Node; +use wdl_format::Formatter; +use wdl_format::element::node::AstNodeFormatExt as _; /// Emits the given diagnostics to the output stream. /// @@ -55,6 +63,7 @@ fn emit_diagnostics(path: &str, source: &str, diagnostics: &[Diagnostic]) -> Res Ok(()) } +/// Analyzes a path. async fn analyze(path: PathBuf, lint: bool) -> Result> { let bar = ProgressBar::new(0); bar.set_style( @@ -149,6 +158,7 @@ pub struct ParseCommand { } impl ParseCommand { + /// Executes the `parse` subcommand. async fn exec(self) -> Result<()> { let source = read_source(&self.path)?; let (document, diagnostics) = Document::parse(&source); @@ -171,6 +181,7 @@ pub struct CheckCommand { } impl CheckCommand { + /// Executes the `check` subcommand. async fn exec(self) -> Result<()> { analyze(self.path, false).await?; Ok(()) @@ -187,6 +198,7 @@ pub struct LintCommand { } impl LintCommand { + /// Executes the `lint` subcommand. async fn exec(self) -> Result<()> { let source = read_source(&self.path)?; let (document, diagnostics) = Document::parse(&source); @@ -230,6 +242,7 @@ pub struct AnalyzeCommand { } impl AnalyzeCommand { + /// Executes the `analyze` subcommand. async fn exec(self) -> Result<()> { let results = analyze(self.path, self.lint).await?; println!("{:#?}", results); @@ -237,6 +250,45 @@ impl AnalyzeCommand { } } +/// Formats a WDL source file. +#[derive(Args)] +#[clap(disable_version_flag = true)] +pub struct FormatCommand { + /// The path to the source WDL file. + #[clap(value_name = "PATH")] + pub path: PathBuf, +} + +impl FormatCommand { + /// Executes the `format` subcommand. + async fn exec(self) -> Result<()> { + let source = read_source(&self.path)?; + + let (document, diagnostics) = Document::parse(&source); + assert!(diagnostics.is_empty()); + + if !diagnostics.is_empty() { + emit_diagnostics(&self.path.to_string_lossy(), &source, &diagnostics)?; + + bail!( + "aborting due to previous {count} diagnostic{s}", + count = diagnostics.len(), + s = if diagnostics.len() == 1 { "" } else { "s" } + ); + } + + let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); + let formatter = Formatter::default(); + + match formatter.format(&document) { + Ok(formatted) => print!("{formatted}"), + Err(err) => bail!(err), + }; + + Ok(()) + } +} + /// A tool for parsing, validating, and linting WDL source code. /// /// This command line tool is intended as an entrypoint to work with and develop @@ -254,19 +306,31 @@ impl AnalyzeCommand { arg_required_else_help = true )] struct App { + /// The subcommand to use. #[command(subcommand)] command: Command, + /// The verbosity flags. #[command(flatten)] verbose: Verbosity, } #[derive(Subcommand)] enum Command { + /// Parses a WDL file. Parse(ParseCommand), + + /// Checks a WDL file. Check(CheckCommand), + + /// Lints a WDL file. Lint(LintCommand), + + /// Analyzes a WDL workspace. Analyze(AnalyzeCommand), + + /// Formats a WDL file. + Format(FormatCommand), } #[tokio::main] @@ -285,6 +349,7 @@ async fn main() -> Result<()> { Command::Check(cmd) => cmd.exec().await, Command::Lint(cmd) => cmd.exec().await, Command::Analyze(cmd) => cmd.exec().await, + Command::Format(cmd) => cmd.exec().await, } { eprintln!( "{error}: {e:?}", diff --git a/wdl/src/lib.rs b/wdl/src/lib.rs index 51376ea7a..e5d91c498 100644 --- a/wdl/src/lib.rs +++ b/wdl/src/lib.rs @@ -78,6 +78,9 @@ pub use wdl_analysis as analysis; #[cfg(feature = "ast")] #[doc(inline)] pub use wdl_ast as ast; +#[cfg(feature = "format")] +#[doc(inline)] +pub use wdl_format as format; #[cfg(feature = "grammar")] #[doc(inline)] pub use wdl_grammar as grammar; From 53b883e7dcf8500ee8da9d724381eef23aa3f55a Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 4 Oct 2024 14:48:00 -0400 Subject: [PATCH 02/60] tests: format framework (everything panics) --- wdl-analysis/src/stdlib.rs | 1702 +++++---- wdl-format/Cargo.toml | 13 +- wdl-format/tests/format.rs | 211 ++ .../ENCODE-DCC_chip-seq-pipeline/LICENSE.txt | 25 + .../ENCODE-DCC_chip-seq-pipeline/source.wdl | 3296 +++++++++++++++++ .../format/clays_complex_script/source.wdl | 165 + .../format/complex_meta_and_calls/source.wdl | 106 + .../imports_with_both_comments/source.wdl | 23 + .../imports_with_inline_comments/source.wdl | 12 + .../imports_with_no_comments/source.wdl | 7 + .../source.wdl | 23 + .../tests/format/interrupt_example/source.wdl | 10 + .../tests/format/seaseq-case/LICENSE.txt | 205 + .../tests/format/seaseq-case/source.wdl | 898 +++++ 14 files changed, 5843 insertions(+), 853 deletions(-) create mode 100644 wdl-format/tests/format.rs create mode 100644 wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt create mode 100644 wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl create mode 100644 wdl-format/tests/format/clays_complex_script/source.wdl create mode 100644 wdl-format/tests/format/complex_meta_and_calls/source.wdl create mode 100644 wdl-format/tests/format/imports_with_both_comments/source.wdl create mode 100644 wdl-format/tests/format/imports_with_inline_comments/source.wdl create mode 100644 wdl-format/tests/format/imports_with_no_comments/source.wdl create mode 100644 wdl-format/tests/format/imports_with_preceding_comments/source.wdl create mode 100644 wdl-format/tests/format/interrupt_example/source.wdl create mode 100644 wdl-format/tests/format/seaseq-case/LICENSE.txt create mode 100644 wdl-format/tests/format/seaseq-case/source.wdl diff --git a/wdl-analysis/src/stdlib.rs b/wdl-analysis/src/stdlib.rs index 4508bcbd9..e7baf6b71 100644 --- a/wdl-analysis/src/stdlib.rs +++ b/wdl-analysis/src/stdlib.rs @@ -7,8 +7,8 @@ use std::sync::LazyLock; use indexmap::IndexMap; use indexmap::IndexSet; -use wdl_ast::version::V1; use wdl_ast::SupportedVersion; +use wdl_ast::version::V1; use crate::types::ArrayType; use crate::types::Coercible; @@ -1441,57 +1441,62 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { let mut functions = IndexMap::new(); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#floor - assert!(functions - .insert( - "floor", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Float) - .ret(PrimitiveTypeKind::Integer) - .build(), + assert!( + functions + .insert( + "floor", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Float) + .ret(PrimitiveTypeKind::Integer) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#ceil - assert!(functions - .insert( - "ceil", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Float) - .ret(PrimitiveTypeKind::Integer) - .build(), + assert!( + functions + .insert( + "ceil", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Float) + .ret(PrimitiveTypeKind::Integer) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#round - assert!(functions - .insert( - "round", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Float) - .ret(PrimitiveTypeKind::Integer) - .build(), + assert!( + functions + .insert( + "round", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Float) + .ret(PrimitiveTypeKind::Integer) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#min - assert!(functions - .insert( - "min", - PolymorphicFunction::new( - SupportedVersion::V1(V1::One), - vec![ + assert!( + functions + .insert( + "min", + PolymorphicFunction::new(SupportedVersion::V1(V1::One), vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::Integer) .parameter(PrimitiveTypeKind::Integer) @@ -1512,19 +1517,18 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::Float) .ret(PrimitiveTypeKind::Float) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#max - assert!(functions - .insert( - "max", - PolymorphicFunction::new( - SupportedVersion::V1(V1::One), - vec![ + assert!( + functions + .insert( + "max", + PolymorphicFunction::new(SupportedVersion::V1(V1::One), vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::Integer) .parameter(PrimitiveTypeKind::Integer) @@ -1545,68 +1549,73 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::Float) .ret(PrimitiveTypeKind::Float) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-find - assert!(functions - .insert( - "find", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .ret(PrimitiveType::optional(PrimitiveTypeKind::String)) - .build(), + assert!( + functions + .insert( + "find", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .ret(PrimitiveType::optional(PrimitiveTypeKind::String)) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-matches - assert!(functions - .insert( - "matches", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .ret(PrimitiveTypeKind::Boolean) - .build(), + assert!( + functions + .insert( + "matches", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .ret(PrimitiveTypeKind::Boolean) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#sub - assert!(functions - .insert( - "sub", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .parameter(PrimitiveTypeKind::String) - .ret(PrimitiveTypeKind::String) - .build(), + assert!( + functions + .insert( + "sub", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .parameter(PrimitiveTypeKind::String) + .ret(PrimitiveTypeKind::String) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#basename - assert!(functions - .insert( - "basename", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "basename", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .required(1) .parameter(PrimitiveTypeKind::File) @@ -1629,19 +1638,18 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::String) .ret(PrimitiveTypeKind::String) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-join_paths - assert!(functions - .insert( - "join_paths", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Two), - vec![ + assert!( + functions + .insert( + "join_paths", + PolymorphicFunction::new(SupportedVersion::V1(V1::Two), vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::File) .parameter(PrimitiveTypeKind::String) @@ -1656,34 +1664,35 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string_non_empty) .ret(PrimitiveTypeKind::File) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#glob - assert!(functions - .insert( - "glob", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::String) - .ret(array_file) - .build(), + assert!( + functions + .insert( + "glob", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::String) + .ret(array_file) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#size - assert!(functions - .insert( - "size", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "size", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .required(1) .parameter(PrimitiveType::optional(PrimitiveTypeKind::File)) @@ -1713,137 +1722,152 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::String) .ret(PrimitiveTypeKind::Float) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#stdout - assert!(functions - .insert( - "stdout", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .ret(PrimitiveTypeKind::File) - .build(), + assert!( + functions + .insert( + "stdout", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .ret(PrimitiveTypeKind::File) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#stderr - assert!(functions - .insert( - "stderr", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .ret(PrimitiveTypeKind::File) - .build(), + assert!( + functions + .insert( + "stderr", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .ret(PrimitiveTypeKind::File) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_string - assert!(functions - .insert( - "read_string", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::String) - .build(), + assert!( + functions + .insert( + "read_string", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::String) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_int - assert!(functions - .insert( - "read_int", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::Integer) - .build(), + assert!( + functions + .insert( + "read_int", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::Integer) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_float - assert!(functions - .insert( - "read_float", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::Float) - .build(), + assert!( + functions + .insert( + "read_float", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::Float) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_boolean - assert!(functions - .insert( - "read_boolean", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(PrimitiveTypeKind::Boolean) - .build(), + assert!( + functions + .insert( + "read_boolean", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(PrimitiveTypeKind::Boolean) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_lines - assert!(functions - .insert( - "read_lines", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(array_string) - .build(), + assert!( + functions + .insert( + "read_lines", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(array_string) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_lines - assert!(functions - .insert( - "write_lines", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(array_string) - .ret(PrimitiveTypeKind::File) - .build(), + assert!( + functions + .insert( + "write_lines", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(array_string) + .ret(PrimitiveTypeKind::File) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_tsv - assert!(functions - .insert( - "read_tsv", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "read_tsv", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .parameter(PrimitiveTypeKind::File) .ret(array_array_string) @@ -1859,19 +1883,18 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string) .ret(array_object) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_tsv - assert!(functions - .insert( - "write_tsv", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "write_tsv", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .parameter(array_array_string) .ret(PrimitiveTypeKind::File) @@ -1894,110 +1917,121 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string) .ret(PrimitiveTypeKind::File) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_map - assert!(functions - .insert( - "read_map", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(map_string_string) - .build(), + assert!( + functions + .insert( + "read_map", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(map_string_string) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_map - assert!(functions - .insert( - "write_map", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(map_string_string) - .ret(PrimitiveTypeKind::File) - .build(), + assert!( + functions + .insert( + "write_map", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(map_string_string) + .ret(PrimitiveTypeKind::File) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_json - assert!(functions - .insert( - "read_json", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(Type::Union) - .build(), + assert!( + functions + .insert( + "read_json", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(Type::Union) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_json - assert!(functions - .insert( - "write_json", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("X", JsonSerializableConstraint) - .parameter(GenericType::Parameter("X")) - .ret(PrimitiveTypeKind::File) - .build(), + assert!( + functions + .insert( + "write_json", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("X", JsonSerializableConstraint) + .parameter(GenericType::Parameter("X")) + .ret(PrimitiveTypeKind::File) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_object - assert!(functions - .insert( - "read_object", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(Type::Object) - .build(), + assert!( + functions + .insert( + "read_object", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(Type::Object) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#read_objects - assert!(functions - .insert( - "read_objects", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::File) - .ret(array_object) - .build(), + assert!( + functions + .insert( + "read_objects", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::File) + .ret(array_object) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_object - assert!(functions - .insert( - "write_object", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "write_object", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .parameter(Type::Object) .ret(PrimitiveTypeKind::File) @@ -2007,19 +2041,18 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericType::Parameter("S")) .ret(PrimitiveTypeKind::File) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#write_objects - assert!(functions - .insert( - "write_objects", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "write_objects", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .parameter(array_object) .ret(PrimitiveTypeKind::File) @@ -2029,256 +2062,281 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericArrayType::new(GenericType::Parameter("S"))) .ret(PrimitiveTypeKind::File) .build(), - ], + ],) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#prefix - assert!(functions - .insert( - "prefix", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(PrimitiveTypeKind::String) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), + assert!( + functions + .insert( + "prefix", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(PrimitiveTypeKind::String) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#suffix - assert!(functions - .insert( - "suffix", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(PrimitiveTypeKind::String) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), + assert!( + functions + .insert( + "suffix", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(PrimitiveTypeKind::String) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#quote - assert!(functions - .insert( - "quote", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), + assert!( + functions + .insert( + "quote", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#squote - assert!(functions - .insert( - "squote", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(array_string) - .build(), + assert!( + functions + .insert( + "squote", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(array_string) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#sep - assert!(functions - .insert( - "sep", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("P", RequiredPrimitiveTypeConstraint) - .parameter(PrimitiveTypeKind::String) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .ret(PrimitiveTypeKind::String) - .build(), + assert!( + functions + .insert( + "sep", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("P", RequiredPrimitiveTypeConstraint) + .parameter(PrimitiveTypeKind::String) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .ret(PrimitiveTypeKind::String) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#range - assert!(functions - .insert( - "range", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .parameter(PrimitiveTypeKind::Integer) - .ret(array_int) - .build(), + assert!( + functions + .insert( + "range", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .parameter(PrimitiveTypeKind::Integer) + .ret(array_int) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#transpose - assert!(functions - .insert( - "transpose", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .parameter(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X"), - ))) - .ret(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X"), - ))) - .build(), + assert!( + functions + .insert( + "transpose", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .parameter(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X"), + ))) + .ret(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X"), + ))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#cross - assert!(functions - .insert( - "cross", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .any_type_parameter("Y") - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) - .ret(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("X"), - GenericType::Parameter("Y"), - ))) - .build(), + assert!( + functions + .insert( + "cross", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .any_type_parameter("Y") + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) + .ret(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("X"), + GenericType::Parameter("Y"), + ))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#zip - assert!(functions - .insert( - "zip", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .any_type_parameter("Y") - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) - .ret(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("X"), - GenericType::Parameter("Y"), - ))) - .build(), + assert!( + functions + .insert( + "zip", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .any_type_parameter("Y") + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .parameter(GenericArrayType::new(GenericType::Parameter("Y"))) + .ret(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("X"), + GenericType::Parameter("Y"), + ))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#unzip - assert!(functions - .insert( - "unzip", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .any_type_parameter("X") - .any_type_parameter("Y") - .parameter(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("X"), - GenericType::Parameter("Y"), - ))) - .ret(GenericPairType::new( - GenericArrayType::new(GenericType::Parameter("X")), - GenericArrayType::new(GenericType::Parameter("Y")), - )) - .build(), + assert!( + functions + .insert( + "unzip", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .any_type_parameter("X") + .any_type_parameter("Y") + .parameter(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("X"), + GenericType::Parameter("Y"), + ))) + .ret(GenericPairType::new( + GenericArrayType::new(GenericType::Parameter("X")), + GenericArrayType::new(GenericType::Parameter("Y")), + )) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-contains - assert!(functions - .insert( - "contains", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .type_parameter("P", AnyPrimitiveTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("P"))) - .parameter(GenericType::Parameter("P")) - .ret(PrimitiveTypeKind::Boolean) - .build(), + assert!( + functions + .insert( + "contains", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .type_parameter("P", AnyPrimitiveTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("P"))) + .parameter(GenericType::Parameter("P")) + .ret(PrimitiveTypeKind::Boolean) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-chunk - assert!(functions - .insert( - "chunk", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .any_type_parameter("X") - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .parameter(PrimitiveTypeKind::Integer) - .ret(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X"), - ))) - .build(), + assert!( + functions + .insert( + "chunk", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .any_type_parameter("X") + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .parameter(PrimitiveTypeKind::Integer) + .ret(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X"), + ))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#flatten - assert!(functions - .insert( - "flatten", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .any_type_parameter("X") - .parameter(GenericArrayType::new(GenericArrayType::new( - GenericType::Parameter("X") - ))) - .ret(GenericArrayType::new(GenericType::Parameter("X"))) - .build(), + assert!( + functions + .insert( + "flatten", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .any_type_parameter("X") + .parameter(GenericArrayType::new(GenericArrayType::new( + GenericType::Parameter("X") + ))) + .ret(GenericArrayType::new(GenericType::Parameter("X"))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#select_first - assert!(functions - .insert( - "select_first", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "select_first", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .type_parameter("X", OptionalTypeConstraint) .parameter(GenericArrayType::non_empty(GenericType::Parameter("X"))) @@ -2291,83 +2349,88 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericType::UnqualifiedParameter("X")) .ret(GenericType::UnqualifiedParameter("X")) .build(), - ] + ]) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#select_all - assert!(functions - .insert( - "select_all", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("X", OptionalTypeConstraint) - .parameter(GenericArrayType::new(GenericType::Parameter("X"))) - .ret(GenericArrayType::new(GenericType::UnqualifiedParameter( - "X" - ))) - .build(), + assert!( + functions + .insert( + "select_all", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("X", OptionalTypeConstraint) + .parameter(GenericArrayType::new(GenericType::Parameter("X"))) + .ret(GenericArrayType::new(GenericType::UnqualifiedParameter( + "X" + ))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#as_pairs - assert!(functions - .insert( - "as_pairs", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericMapType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - )) - .ret(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - ))) - .build(), + assert!( + functions + .insert( + "as_pairs", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericMapType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + )) + .ret(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + ))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#as_map - assert!(functions - .insert( - "as_map", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - ))) - .ret(GenericMapType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - )) - .build(), + assert!( + functions + .insert( + "as_map", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + ))) + .ret(GenericMapType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + )) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#keys - assert!(functions - .insert( - "keys", - PolymorphicFunction::new( - SupportedVersion::V1(V1::One), - vec![ + assert!( + functions + .insert( + "keys", + PolymorphicFunction::new(SupportedVersion::V1(V1::One), vec![ FunctionSignature::builder() .type_parameter("K", RequiredPrimitiveTypeConstraint) .any_type_parameter("V") @@ -2386,19 +2449,18 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(Type::Object) .ret(array_string) .build(), - ] + ]) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#contains_key - assert!(functions - .insert( - "contains_key", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Two), - vec![ + assert!( + functions + .insert( + "contains_key", + PolymorphicFunction::new(SupportedVersion::V1(V1::Two), vec![ FunctionSignature::builder() .type_parameter("K", RequiredPrimitiveTypeConstraint) .any_type_parameter("V") @@ -2434,78 +2496,83 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(array_string) .ret(PrimitiveTypeKind::Boolean) .build(), - ] + ]) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#-values - assert!(functions - .insert( - "values", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Two), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericMapType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - )) - .ret(GenericArrayType::new(GenericType::Parameter("V"))) - .build(), + assert!( + functions + .insert( + "values", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Two), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericMapType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + )) + .ret(GenericArrayType::new(GenericType::Parameter("V"))) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#collect_by_key - assert!(functions - .insert( - "collect_by_key", - MonomorphicFunction::new( - SupportedVersion::V1(V1::One), - FunctionSignature::builder() - .type_parameter("K", RequiredPrimitiveTypeConstraint) - .any_type_parameter("V") - .parameter(GenericArrayType::new(GenericPairType::new( - GenericType::Parameter("K"), - GenericType::Parameter("V") - ))) - .ret(GenericMapType::new( - GenericType::Parameter("K"), - GenericArrayType::new(GenericType::Parameter("V")) - )) - .build(), + assert!( + functions + .insert( + "collect_by_key", + MonomorphicFunction::new( + SupportedVersion::V1(V1::One), + FunctionSignature::builder() + .type_parameter("K", RequiredPrimitiveTypeConstraint) + .any_type_parameter("V") + .parameter(GenericArrayType::new(GenericPairType::new( + GenericType::Parameter("K"), + GenericType::Parameter("V") + ))) + .ret(GenericMapType::new( + GenericType::Parameter("K"), + GenericArrayType::new(GenericType::Parameter("V")) + )) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#defined - assert!(functions - .insert( - "defined", - MonomorphicFunction::new( - SupportedVersion::V1(V1::Zero), - FunctionSignature::builder() - .type_parameter("X", OptionalTypeConstraint) - .parameter(GenericType::Parameter("X")) - .ret(PrimitiveTypeKind::Boolean) - .build(), + assert!( + functions + .insert( + "defined", + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), + FunctionSignature::builder() + .type_parameter("X", OptionalTypeConstraint) + .parameter(GenericType::Parameter("X")) + .ret(PrimitiveTypeKind::Boolean) + .build(), + ) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); // https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#length - assert!(functions - .insert( - "length", - PolymorphicFunction::new( - SupportedVersion::V1(V1::Zero), - vec![ + assert!( + functions + .insert( + "length", + PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ FunctionSignature::builder() .any_type_parameter("X") .parameter(GenericArrayType::new(GenericType::Parameter("X"))) @@ -2528,11 +2595,11 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(PrimitiveTypeKind::String) .ret(PrimitiveTypeKind::Integer) .build(), - ] + ]) + .into(), ) - .into(), - ) - .is_none()); + .is_none() + ); StandardLibrary { types, @@ -2672,13 +2739,10 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(1)); let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + ]) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(1)); @@ -2686,13 +2750,10 @@ mod test { let e = f .bind(&mut types, &[PrimitiveTypeKind::String.into()]) .expect_err("bind should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Float`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Float`".into() + }); // Check for Union (i.e. indeterminate) let ty = f @@ -2723,13 +2784,10 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(1)); let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + ]) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(1)); @@ -2737,13 +2795,10 @@ mod test { let e = f .bind(&mut types, &[PrimitiveTypeKind::String.into()]) .expect_err("bind should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Map[K, V]` where `K`: any required primitive type".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Map[K, V]` where `K`: any required primitive type".into() + }); // Check for Union (i.e. indeterminate) let ty = f @@ -2770,13 +2825,10 @@ mod test { PrimitiveTypeKind::Boolean, )); let e = f.bind(&mut types, &[ty]).expect_err("bind should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Map[K, Boolean]` where `K`: any required primitive type".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Map[K, Boolean]` where `K`: any required primitive type".into() + }); } #[test] @@ -2791,13 +2843,10 @@ mod test { let e = f .bind(&mut types, &[array_string]) .expect_err("bind should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Array[X]` where `X`: any optional type".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Array[X]` where `X`: any optional type".into() + }); // Check for a Array[String?] -> Array[String] let array_optional_string = types.add_array(ArrayType::new(PrimitiveType::optional( @@ -2836,136 +2885,97 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(2)); let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - PrimitiveTypeKind::File.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + PrimitiveTypeKind::File.into(), + ]) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(2)); // Check for `(Int, Int)` let ty = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::Integer.into(), - PrimitiveTypeKind::Integer.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::Integer.into(), + PrimitiveTypeKind::Integer.into(), + ]) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Int"); // Check for `(Int, Float)` let ty = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::Integer.into(), - PrimitiveTypeKind::Float.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::Integer.into(), + PrimitiveTypeKind::Float.into(), + ]) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Float"); // Check for `(Float, Int)` let ty = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::Float.into(), - PrimitiveTypeKind::Integer.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::Float.into(), + PrimitiveTypeKind::Integer.into(), + ]) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Float"); // Check for `(Float, Float)` let ty = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::Float.into(), - PrimitiveTypeKind::Float.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::Float.into(), + PrimitiveTypeKind::Float.into(), + ]) .expect("binding should succeed"); assert_eq!(ty.display(&types).to_string(), "Float"); // Check for `(String, Int)` let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Integer.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Integer.into(), + ]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Int` or `Float`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Int` or `Float`".into() + }); // Check for `(Int, String)` let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::Integer.into(), - PrimitiveTypeKind::String.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::Integer.into(), + PrimitiveTypeKind::String.into(), + ]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`Int` or `Float`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`Int` or `Float`".into() + }); // Check for `(String, Float)` let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Float.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Float.into(), + ]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Int` or `Float`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Int` or `Float`".into() + }); // Check for `(Float, String)` let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::Float.into(), - PrimitiveTypeKind::String.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::Float.into(), + PrimitiveTypeKind::String.into(), + ]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`Int` or `Float`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`Int` or `Float`".into() + }); } #[test] @@ -2980,14 +2990,11 @@ mod test { assert_eq!(e, FunctionBindError::TooFewArguments(1)); let e = f - .bind( - &mut types, - &[ - PrimitiveTypeKind::String.into(), - PrimitiveTypeKind::Boolean.into(), - PrimitiveTypeKind::File.into(), - ], - ) + .bind(&mut types, &[ + PrimitiveTypeKind::String.into(), + PrimitiveTypeKind::Boolean.into(), + PrimitiveTypeKind::File.into(), + ]) .expect_err("bind should fail"); assert_eq!(e, FunctionBindError::TooManyArguments(2)); @@ -2995,13 +3002,10 @@ mod test { let e = f .bind(&mut types, &[PrimitiveTypeKind::Integer.into()]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 0, - expected: "`Array[X]` where `X`: any optional type".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 0, + expected: "`Array[X]` where `X`: any optional type".into() + }); // Check `Array[String?]+` let array = types.add_array(ArrayType::non_empty(PrimitiveType::optional( @@ -3022,13 +3026,10 @@ mod test { let e = f .bind(&mut types, &[array, PrimitiveTypeKind::Integer.into()]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`String`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`String`".into() + }); // Check `Array[String?]` let array = types.add_array(ArrayType::new(PrimitiveType::optional( @@ -3049,12 +3050,9 @@ mod test { let e = f .bind(&mut types, &[array, PrimitiveTypeKind::Integer.into()]) .expect_err("binding should fail"); - assert_eq!( - e, - FunctionBindError::ArgumentTypeMismatch { - index: 1, - expected: "`String`".into() - } - ); + assert_eq!(e, FunctionBindError::ArgumentTypeMismatch { + index: 1, + expected: "`String`".into() + }); } } diff --git a/wdl-format/Cargo.toml b/wdl-format/Cargo.toml index 03a435cdf..b26d7e086 100644 --- a/wdl-format/Cargo.toml +++ b/wdl-format/Cargo.toml @@ -8,8 +8,19 @@ homepage.workspace = true repository.workspace = true [dependencies] -wdl-ast = { path = "../wdl-ast", version = "0.7.1" } +wdl-ast = { path = "../wdl-ast", version = "0.7.1", features = ["codespan"] } nonempty.workspace = true +[dev-dependencies] +pretty_assertions = { workspace = true } +approx = { workspace = true } +rayon = { workspace = true } +colored = { workspace = true } +codespan-reporting = { workspace = true } + [lints] workspace = true + +[[test]] +name = "format" +harness = false diff --git a/wdl-format/tests/format.rs b/wdl-format/tests/format.rs new file mode 100644 index 000000000..aeaecf553 --- /dev/null +++ b/wdl-format/tests/format.rs @@ -0,0 +1,211 @@ +//! The format file tests. +//! +//! This test looks for directories in `tests/format`. +//! +//! Each directory is expected to contain: +//! +//! * `source.wdl` - the test input source to parse. +//! * `source.formatted` - the expected formatted output. +//! +//! The `source.formatted` file may be automatically generated or updated by +//! setting the `BLESS` environment variable when running this test. + +use std::collections::HashSet; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::exit; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use codespan_reporting::files::SimpleFile; +use codespan_reporting::term; +use codespan_reporting::term::Config; +use codespan_reporting::term::termcolor::Buffer; +use colored::Colorize; +use pretty_assertions::StrComparison; +use rayon::prelude::*; +use wdl_ast::Diagnostic; +use wdl_ast::Document; +use wdl_ast::Node; +use wdl_format::Formatter; +use wdl_format::element::node::AstNodeFormatExt; + +fn find_tests() -> Vec { + // Check for filter arguments consisting of test names + let mut filter = HashSet::new(); + for arg in std::env::args().skip_while(|a| a != "--").skip(1) { + if !arg.starts_with('-') { + filter.insert(arg); + } + } + + let mut tests: Vec = Vec::new(); + for entry in Path::new("tests/format").read_dir().unwrap() { + let entry = entry.expect("failed to read directory"); + let path = entry.path(); + if !path.is_dir() + || (!filter.is_empty() + && !filter.contains(entry.file_name().to_str().expect("name should be UTF-8"))) + { + continue; + } + + tests.push(path); + } + + tests.sort(); + tests +} + +fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { + let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); + let mut buffer = Buffer::no_color(); + for diagnostic in diagnostics { + term::emit( + &mut buffer, + &Config::default(), + &file, + &diagnostic.to_codespan(), + ) + .expect("should emit"); + } + + String::from_utf8(buffer.into_inner()).expect("should be UTF-8") +} + +fn compare_result(path: &Path, result: &str) -> Result<(), String> { + if env::var_os("BLESS").is_some() { + fs::write(path, &result).map_err(|e| { + format!( + "failed to write result file `{path}`: {e}", + path = path.display() + ) + })?; + return Ok(()); + } + + let expected = fs::read_to_string(path) + .map_err(|e| { + format!( + "failed to read result file `{path}`: {e}", + path = path.display() + ) + })? + .replace("\r\n", "\n"); + + if expected != result { + return Err(format!( + "result is not as expected:\n{}", + StrComparison::new(&expected, &result), + )); + } + + Ok(()) +} + +fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { + let path = test.join("source.wdl"); + let source = std::fs::read_to_string(&path).map_err(|e| { + format!( + "failed to read source file `{path}`: {e}", + path = path.display() + ) + })?; + + let (document, diagnostics) = Document::parse(&source); + + if !diagnostics.is_empty() { + return Err(format!( + "failed to format `{path}`: {e}", + path = path.display(), + e = format_diagnostics(&diagnostics, path.as_path(), &source) + )); + }; + + let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); + let formatter = Formatter::default(); + + let formatted = match formatter.format(&document) { + Ok(formatted) => formatted, + Err(e) => { + return Err(format!( + "failed to format `{path}`: {e}", + path = path.display(), + e = e + )); + } + }; + compare_result(path.with_extension("formatted.wdl").as_path(), &formatted)?; + + ntests.fetch_add(1, Ordering::SeqCst); + Ok(()) +} + +fn main() { + let tests = find_tests(); + println!("\nrunning {} tests\n", tests.len()); + + let ntests = AtomicUsize::new(0); + let errors = tests + .par_iter() + .filter_map(|test| { + let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); + match std::panic::catch_unwind(|| { + match run_test(test, &ntests) + .map_err(|e| format!("failed to run test `{path}`: {e}", path = test.display())) + .err() + { + Some(e) => { + println!("test {test_name} ... {failed}", failed = "failed".red()); + Some((test_name, e)) + } + None => { + println!("test {test_name} ... {ok}", ok = "ok".green()); + None + } + } + }) { + Ok(result) => result, + Err(e) => { + println!( + "test {test_name} ... {panicked}", + panicked = "panicked".red() + ); + Some(( + test_name, + format!( + "test panicked: {e:?}", + e = e + .downcast_ref::() + .map(|s| s.as_str()) + .or_else(|| e.downcast_ref::<&str>().copied()) + .unwrap_or("no panic message") + ), + )) + } + } + }) + .collect::>(); + + if !errors.is_empty() { + eprintln!( + "\n{count} test(s) {failed}:", + count = errors.len(), + failed = "failed".red() + ); + + for (name, msg) in errors.iter() { + eprintln!("{name}: {msg}", msg = msg.red()); + } + + exit(1); + } + + println!( + "\ntest result: ok. {} passed\n", + ntests.load(Ordering::SeqCst) + ); +} diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt new file mode 100644 index 000000000..d9a98e06c --- /dev/null +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt @@ -0,0 +1,25 @@ +'source.wdl' obtained from: https://github.com/ENCODE-DCC/chip-seq-pipeline2/blob/26eeda81a0540dc793fc69b0c390d232ca7ca50a/chip.wdl +on the date 08-05-2024. +It was accompanied by the following license: + +MIT License + +Copyright (c) 2017 ENCODE DCC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl new file mode 100644 index 000000000..92c09ea84 --- /dev/null +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl @@ -0,0 +1,3296 @@ +version 1.0 + +struct RuntimeEnvironment { + String docker + String singularity + String conda +} + +workflow chip { + String pipeline_ver = 'v2.2.2' + + meta { + version: 'v2.2.2' + + author: 'Jin wook Lee' + email: 'leepc12@gmail.com' + description: 'ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil.' + organization: 'ENCODE DCC' + + specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing' + + default_docker: 'encodedcc/chip-seq-pipeline:v2.2.2' + default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json' + + parameter_group: { + runtime_environment: { + title: 'Runtime environment', + description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.' + }, + pipeline_metadata: { + title: 'Pipeline metadata', + description: 'Metadata for a pipeline (e.g. title and description).' + }, + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.', + help: 'Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input genomic data', + description: 'Genomic input files for experiment.', + help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].' + }, + input_genomic_data_control: { + title: 'Input genomic data (control)', + description: 'Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.', + help: 'Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.' + }, + pipeline_parameter: { + title: 'Pipeline parameter', + description: 'Pipeline type and flags to turn on/off analyses.', + help: 'Use chip.align_only to align FASTQs without peak calling.' + }, + alignment: { + title: 'Alignment', + description: 'Parameters for alignment.', + help: 'Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.' + }, + peak_calling: { + title: 'Peak calling', + description: 'Parameters for peak calling.', + help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.' + }, + resource_parameter: { + title: 'Resource parameter', + description: 'Number of CPUs (threads), max. memory and walltime for tasks.', + help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.' + } + } + } + input { + # group: runtime_environment + String docker = 'encodedcc/chip-seq-pipeline:v2.2.2' + String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + String conda = 'encd-chip' + String conda_macs2 = 'encd-chip-macs2' + String conda_spp = 'encd-chip-spp' + + # group: pipeline_metadata + String title = 'Untitled' + String description = 'No description' + + # group: reference_genome + File? genome_tsv + String? genome_name + File? ref_fa + File? bwa_idx_tar + File? bowtie2_idx_tar + File? chrsz + File? blacklist + File? blacklist2 + String? mito_chr_name + String? regex_bfilt_peak_chr_name + String? gensz + File? custom_aligner_idx_tar + + # group: input_genomic_data + Boolean? paired_end + Array[Boolean] paired_ends = [] + Array[File] fastqs_rep1_R1 = [] + Array[File] fastqs_rep1_R2 = [] + Array[File] fastqs_rep2_R1 = [] + Array[File] fastqs_rep2_R2 = [] + Array[File] fastqs_rep3_R1 = [] + Array[File] fastqs_rep3_R2 = [] + Array[File] fastqs_rep4_R1 = [] + Array[File] fastqs_rep4_R2 = [] + Array[File] fastqs_rep5_R1 = [] + Array[File] fastqs_rep5_R2 = [] + Array[File] fastqs_rep6_R1 = [] + Array[File] fastqs_rep6_R2 = [] + Array[File] fastqs_rep7_R1 = [] + Array[File] fastqs_rep7_R2 = [] + Array[File] fastqs_rep8_R1 = [] + Array[File] fastqs_rep8_R2 = [] + Array[File] fastqs_rep9_R1 = [] + Array[File] fastqs_rep9_R2 = [] + Array[File] fastqs_rep10_R1 = [] + Array[File] fastqs_rep10_R2 = [] + Array[File] bams = [] + Array[File] nodup_bams = [] + Array[File] tas = [] + Array[File] peaks = [] + Array[File] peaks_pr1 = [] + Array[File] peaks_pr2 = [] + File? peak_ppr1 + File? peak_ppr2 + File? peak_pooled + + Boolean? ctl_paired_end + Array[Boolean] ctl_paired_ends = [] + Array[File] ctl_fastqs_rep1_R1 = [] + Array[File] ctl_fastqs_rep1_R2 = [] + Array[File] ctl_fastqs_rep2_R1 = [] + Array[File] ctl_fastqs_rep2_R2 = [] + Array[File] ctl_fastqs_rep3_R1 = [] + Array[File] ctl_fastqs_rep3_R2 = [] + Array[File] ctl_fastqs_rep4_R1 = [] + Array[File] ctl_fastqs_rep4_R2 = [] + Array[File] ctl_fastqs_rep5_R1 = [] + Array[File] ctl_fastqs_rep5_R2 = [] + Array[File] ctl_fastqs_rep6_R1 = [] + Array[File] ctl_fastqs_rep6_R2 = [] + Array[File] ctl_fastqs_rep7_R1 = [] + Array[File] ctl_fastqs_rep7_R2 = [] + Array[File] ctl_fastqs_rep8_R1 = [] + Array[File] ctl_fastqs_rep8_R2 = [] + Array[File] ctl_fastqs_rep9_R1 = [] + Array[File] ctl_fastqs_rep9_R2 = [] + Array[File] ctl_fastqs_rep10_R1 = [] + Array[File] ctl_fastqs_rep10_R2 = [] + Array[File] ctl_bams = [] + Array[File] ctl_nodup_bams = [] + Array[File] ctl_tas = [] + + # group: pipeline_parameter + String pipeline_type + Boolean align_only = false + Boolean redact_nodup_bam = false + Boolean true_rep_only = false + Boolean enable_count_signal_track = false + Boolean enable_jsd = true + Boolean enable_gc_bias = true + + # group: alignment + String aligner = 'bowtie2' + File? custom_align_py + Boolean use_bwa_mem_for_pe = false + Int bwa_mem_read_len_limit = 70 + Boolean use_bowtie2_local_mode = false + Int crop_length = 0 + Int crop_length_tol = 2 + String trimmomatic_phred_score_format = 'auto' + Int xcor_trim_bp = 50 + Boolean use_filt_pe_ta_for_xcor = false + String dup_marker = 'picard' + Boolean no_dup_removal = false + Int mapq_thresh = 30 + Array[String] filter_chrs = [] + Int subsample_reads = 0 + Int ctl_subsample_reads = 0 + Int xcor_subsample_reads = 15000000 + Int xcor_exclusion_range_min = -500 + Int? xcor_exclusion_range_max + Int pseudoreplication_random_seed = 0 + + # group: peak_calling + Int ctl_depth_limit = 200000000 + Float exp_ctl_depth_ratio_limit = 5.0 + Array[Int?] fraglen = [] + String? peak_caller + Boolean always_use_pooled_ctl = true + Float ctl_depth_ratio = 1.2 + Int? cap_num_peak + Float pval_thresh = 0.01 + Float fdr_thresh = 0.01 + Float idr_thresh = 0.05 + + # group: resource_parameter + Int align_cpu = 6 + Float align_bowtie2_mem_factor = 0.15 + Float align_bwa_mem_factor = 1.0 + Int align_time_hr = 48 + Float align_bowtie2_disk_factor = 8.0 + Float align_bwa_disk_factor = 8.0 + + Int filter_cpu = 4 + Float filter_mem_factor = 0.4 + Int filter_time_hr = 24 + Float filter_disk_factor = 8.0 + + Int bam2ta_cpu = 2 + Float bam2ta_mem_factor = 0.35 + Int bam2ta_time_hr = 6 + Float bam2ta_disk_factor = 4.0 + + Float spr_mem_factor = 20.0 + Float spr_disk_factor = 30.0 + + Int jsd_cpu = 4 + Float jsd_mem_factor = 0.1 + Int jsd_time_hr = 6 + Float jsd_disk_factor = 2.0 + + Int xcor_cpu = 2 + Float xcor_mem_factor = 1.0 + Int xcor_time_hr = 24 + Float xcor_disk_factor = 4.5 + + Float subsample_ctl_mem_factor = 22.0 + Float subsample_ctl_disk_factor = 15.0 + + Float macs2_signal_track_mem_factor = 12.0 + Int macs2_signal_track_time_hr = 24 + Float macs2_signal_track_disk_factor = 80.0 + + Int call_peak_cpu = 6 + Float call_peak_spp_mem_factor = 5.0 + Float call_peak_macs2_mem_factor = 5.0 + Int call_peak_time_hr = 72 + Float call_peak_spp_disk_factor = 5.0 + Float call_peak_macs2_disk_factor = 30.0 + + String? align_trimmomatic_java_heap + String? filter_picard_java_heap + String? gc_bias_picard_java_heap + } + + parameter_meta { + docker: { + description: 'Default Docker image URI to run WDL tasks.', + group: 'runtime_environment', + example: 'ubuntu:20.04' + } + singularity: { + description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.', + group: 'runtime_environment', + example: 'docker://ubuntu:20.04' + } + conda: { + description: 'Default Conda environment name to run WDL tasks. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip' + } + conda_macs2: { + description: 'Conda environment name for task macs2. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-macs2' + } + conda_spp: { + description: 'Conda environment name for tasks spp/xcor. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-spp' + } + title: { + description: 'Experiment title.', + group: 'pipeline_metadata', + example: 'ENCSR936XTK (subsampled 1/50)' + } + description: { + description: 'Experiment description.', + group: 'pipeline_metadata', + example: 'ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)' + } + genome_tsv: { + description: 'Reference genome database TSV.', + group: 'reference_genome', + help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.', + example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv' + } + genome_name: { + description: 'Genome name.', + group: 'reference_genome' + } + ref_fa: { + description: 'Reference FASTA file.', + group: 'reference_genome' + } + bowtie2_idx_tar: { + description: 'BWA index TAR file.', + group: 'reference_genome' + } + custom_aligner_idx_tar: { + description: 'Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.', + group: 'reference_genome' + } + chrsz: { + description: '2-col chromosome sizes file.', + group: 'reference_genome' + } + blacklist: { + description: 'Blacklist file in BED format.', + group: 'reference_genome', + help: 'Peaks will be filtered with this file.' + } + blacklist2: { + description: 'Secondary blacklist file in BED format.', + group: 'reference_genome', + help: 'If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.' + } + mito_chr_name: { + description: 'Mitochondrial chromosome name.', + group: 'reference_genome', + help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.' + } + regex_bfilt_peak_chr_name: { + description: 'Reg-ex for chromosomes to keep while filtering peaks.', + group: 'reference_genome', + help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.' + } + gensz: { + description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.', + group: 'reference_genome' + } + paired_end: { + description: 'Sequencing endedness.', + group: 'input_genomic_data', + help: 'Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.', + example: true + } + paired_ends: { + description: 'Sequencing endedness array (for mixed SE/PE datasets).', + group: 'input_genomic_data', + help: 'Whether each biological replicate is paired ended or not.' + } + fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + bams: { + description: 'List of unfiltered/raw BAM files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].' + } + nodup_bams: { + description: 'List of filtered/deduped BAM files for each biological replicate', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].' + } + tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].' + } + peaks: { + description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.' + } + peaks_pr1: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peaks_pr2: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peak_pooled: { + description: 'NARROWPEAK file for pooled true replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.' + } + peak_ppr1: { + description: 'NARROWPEAK file for pooled pseudo replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.' + } + peak_ppr2: { + description: 'NARROWPEAK file for pooled pseudo replicate 2.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.' + } + + ctl_paired_end: { + description: 'Sequencing endedness for all controls.', + group: 'input_genomic_data_control', + help: 'Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.' + } + ctl_paired_ends: { + description: 'Sequencing endedness array for mixed SE/PE controls.', + group: 'input_genomic_data_control', + help: 'Whether each control replicate is paired ended or not.' + } + ctl_fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_bams: { + description: 'List of unfiltered/raw BAM files for each control replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].' + } + ctl_nodup_bams: { + description: 'List of filtered/deduped BAM files for each control replicate', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].' + } + ctl_tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].' + } + + pipeline_type: { + description: 'Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.', + group: 'pipeline_parameter', + help: 'Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.', + choices: ['tf', 'histone', 'control'], + example: 'tf' + } + redact_nodup_bam: { + description: 'Redact filtered/nodup BAM.', + group: 'pipeline_parameter', + help: 'Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.' + } + align_only: { + description: 'Align only mode.', + group: 'pipeline_parameter', + help: 'Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.' + } + true_rep_only: { + description: 'Disables all analyses related to pseudo-replicates.', + group: 'pipeline_parameter', + help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).' + } + enable_count_signal_track: { + description: 'Enables generation of count signal tracks.', + group: 'pipeline_parameter' + } + enable_jsd: { + description: 'Enables Jensen-Shannon Distance (JSD) plot generation.', + group: 'pipeline_parameter' + } + enable_gc_bias: { + description: 'Enables GC bias calculation.', + group: 'pipeline_parameter' + } + + aligner: { + description: 'Aligner. bowtie2, bwa or custom', + group: 'alignment', + help: 'It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + choices: ['bowtie2', 'bwa', 'custom'], + example: 'bowtie2' + } + custom_align_py: { + description: 'Python script for a custom aligner.', + group: 'alignment', + help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".' + } + use_bwa_mem_for_pe: { + description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.', + group: 'alignment', + help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.' + } + bwa_mem_read_len_limit: { + description: 'Read length limit for bwa mem (for PE FASTQs only).', + group: 'alignment', + help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.' + } + use_bowtie2_local_mode: { + description: 'Use bowtie2\'s local mode (soft-clipping).', + group: 'alignment', + help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.' + } + crop_length: { + description: 'Crop FASTQs\' reads longer than this length.', + group: 'alignment', + help: 'Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.' + } + crop_length_tol: { + description: 'Tolerance for cropping reads in FASTQs.', + group: 'alignment', + help: 'Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.' + } + trimmomatic_phred_score_format: { + description: 'Base encoding (format) for Phred score in FASTQs.', + group: 'alignment', + choices: ['auto', 'phred33', 'phred64'], + help: 'This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".' + } + xcor_trim_bp: { + description: 'Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.', + group: 'alignment', + help: 'This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.' + } + use_filt_pe_ta_for_xcor: { + description: 'Use filtered PE BAM for cross-correlation analysis.', + group: 'alignment', + help: 'If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.' + } + dup_marker: { + description: 'Marker for duplicate reads. picard or sambamba.', + group: 'alignment', + help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.', + choices: ['picard', 'sambamba'], + example: 'picard' + } + no_dup_removal: { + description: 'Disable removal of duplicate reads during filtering BAM.', + group: 'alignment', + help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.' + } + mapq_thresh: { + description: 'Threshold for low MAPQ reads removal.', + group: 'alignment', + help: 'Low MAPQ reads are filtered out while filtering BAM.' + } + filter_chrs: { + description: 'List of chromosomes to be filtered out while filtering BAM.', + group: 'alignment', + help: 'It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.' + } + subsample_reads: { + description: 'Subsample reads. Shuffle and subsample reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + ctl_subsample_reads: { + description: 'Subsample control reads. Shuffle and subsample control reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + xcor_subsample_reads: { + description: 'Subsample reads for cross-corrlelation analysis only.', + group: 'alignment', + help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.' + } + xcor_exclusion_range_min: { + description: 'Exclusion minimum for cross-correlation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.' + } + xcor_exclusion_range_max: { + description: 'Exclusion maximum for cross-coorrelation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used' + } + pseudoreplication_random_seed: { + description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).', + group: 'alignment', + help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.' + } + ctl_depth_limit: { + description: 'Hard limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.' + } + exp_ctl_depth_ratio_limit: { + description: 'Second limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.' + } + fraglen: { + description: 'Fragment length for each biological replicate.', + group: 'peak_calling', + help: 'Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.' + } + peak_caller: { + description: 'Peak caller.', + group: 'peak_calling', + help: 'It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).', + example: 'spp' + } + always_use_pooled_ctl: { + description: 'Always choose a pooled control for each experiment replicate.', + group: 'peak_calling', + help: 'If turned on, ignores chip.ctl_depth_ratio.' + } + ctl_depth_ratio: { + description: 'Maximum depth ratio between control replicates.', + group: 'peak_calling', + help: 'If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.' + } + + cap_num_peak: { + description: 'Upper limit on the number of peaks.', + group: 'peak_calling', + help: 'It is 30000000 and 50000000 by default for spp and macs2, respectively.' + } + pval_thresh: { + description: 'p-value Threshold for MACS2 peak caller.', + group: 'peak_calling', + help: 'macs2 callpeak -p' + } + fdr_thresh: { + description: 'FDR threshold for spp peak caller (phantompeakqualtools).', + group: 'peak_calling', + help: 'run_spp.R -fdr=' + } + idr_thresh: { + description: 'IDR threshold.', + group: 'peak_calling' + } + + align_cpu: { + description: 'Number of cores for task align.', + group: 'resource_parameter', + help: 'Task align merges/crops/maps FASTQs.' + } + align_bowtie2_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_bwa_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_time_hr: { + description: 'Walltime (h) required for task align.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + align_bowtie2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + align_bwa_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + filter_cpu: { + description: 'Number of cores for task filter.', + group: 'resource_parameter', + help: 'Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.' + } + filter_mem_factor: { + description: 'Multiplication factor to determine memory required for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + filter_time_hr: { + description: 'Walltime (h) required for task filter.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + filter_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.' + } + bam2ta_cpu: { + description: 'Number of cores for task bam2ta.', + group: 'resource_parameter', + help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.' + } + bam2ta_mem_factor: { + description: 'Multiplication factor to determine memory required for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + bam2ta_time_hr: { + description: 'Walltime (h) required for task bam2ta.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + bam2ta_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + spr_mem_factor: { + description: 'Multiplication factor to determine memory required for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + spr_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + jsd_cpu: { + description: 'Number of cores for task jsd.', + group: 'resource_parameter', + help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.' + } + jsd_mem_factor: { + description: 'Multiplication factor to determine memory required for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + jsd_time_hr: { + description: 'Walltime (h) required for task jsd.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + jsd_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + xcor_cpu: { + description: 'Number of cores for task xcor.', + group: 'resource_parameter', + help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.' + } + xcor_mem_factor: { + description: 'Multiplication factor to determine memory required for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + xcor_time_hr: { + description: 'Walltime (h) required for task xcor.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + xcor_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + subsample_ctl_mem_factor: { + description: 'Multiplication factor to determine memory required for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + subsample_ctl_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_cpu: { + description: 'Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.', + group: 'resource_parameter', + help: 'Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.' + } + call_peak_spp_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_macs2_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_time_hr: { + description: 'Walltime (h) required for task call_peak.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + call_peak_spp_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_macs2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + macs2_signal_track_mem_factor: { + description: 'Multiplication factor to determine memory required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + macs2_signal_track_time_hr: { + description: 'Walltime (h) required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + macs2_signal_track_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + align_trimmomatic_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task align.', + group: 'resource_parameter', + help: 'Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.' + } + filter_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task filter.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.' + } + gc_bias_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task gc_bias.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.' + } + } + RuntimeEnvironment runtime_environment = { + 'docker': docker, 'singularity': singularity, 'conda': conda + } + RuntimeEnvironment runtime_environment_spp = { + 'docker': docker, 'singularity': singularity, 'conda': conda_spp + } + RuntimeEnvironment runtime_environment_macs2 = { + 'docker': docker, 'singularity': singularity, 'conda': conda_macs2 + } + + # read genome data and paths + if ( defined(genome_tsv) ) { + call read_genome_tsv { input: + genome_tsv = genome_tsv, + runtime_environment = runtime_environment + } + } + File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa]) + File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar + else read_genome_tsv.bwa_idx_tar + File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar]) + File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz]) + String gensz_ = select_first([gensz, read_genome_tsv.gensz]) + File? blacklist1_ = if defined(blacklist) then blacklist + else read_genome_tsv.blacklist + File? blacklist2_ = if defined(blacklist2) then blacklist2 + else read_genome_tsv.blacklist2 + # merge multiple blacklists + # two blacklists can have different number of columns (3 vs 6) + # so we limit merged blacklist's columns to 3 + Array[File] blacklists = select_all([blacklist1_, blacklist2_]) + if ( length(blacklists) > 1 ) { + call pool_ta as pool_blacklist { input: + tas = blacklists, + col = 3, + runtime_environment = runtime_environment + } + } + File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled + else if length(blacklists) > 0 then blacklists[0] + else blacklist2_ + String mito_chr_name_ = select_first([mito_chr_name, read_genome_tsv.mito_chr_name]) + String regex_bfilt_peak_chr_name_ = select_first([regex_bfilt_peak_chr_name, read_genome_tsv.regex_bfilt_peak_chr_name]) + String genome_name_ = select_first([genome_name, read_genome_tsv.genome_name, basename(chrsz_)]) + + ### temp vars (do not define these) + String aligner_ = if defined(custom_align_py) then 'custom' else aligner + String peak_caller_ = if pipeline_type=='tf' then select_first([peak_caller, 'spp']) + else select_first([peak_caller, 'macs2']) + String peak_type_ = if peak_caller_=='spp' then 'regionPeak' + else 'narrowPeak' + Boolean enable_idr = pipeline_type=='tf' # enable_idr for TF chipseq only + String idr_rank_ = if peak_caller_=='spp' then 'signal.value' + else if peak_caller_=='macs2' then 'p.value' + else 'p.value' + Int cap_num_peak_spp = 300000 + Int cap_num_peak_macs2 = 500000 + Int cap_num_peak_ = if peak_caller_ == 'spp' then select_first([cap_num_peak, cap_num_peak_spp]) + else select_first([cap_num_peak, cap_num_peak_macs2]) + Int mapq_thresh_ = mapq_thresh + Boolean enable_xcor_ = if pipeline_type=='control' then false else true + Boolean enable_count_signal_track_ = if pipeline_type=='control' then false else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type=='control' then false else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type=='control' then false else enable_gc_bias + Boolean align_only_ = if pipeline_type=='control' then true else align_only + + Float align_mem_factor_ = if aligner_ =='bowtie2' then align_bowtie2_mem_factor + else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ =='bowtie2' then align_bowtie2_disk_factor + else align_bwa_disk_factor + Float call_peak_mem_factor_ = if peak_caller_ =='spp' then call_peak_spp_mem_factor + else call_peak_macs2_mem_factor + Float call_peak_disk_factor_ = if peak_caller_ =='spp' then call_peak_spp_disk_factor + else call_peak_macs2_disk_factor + + # temporary 2-dim fastqs array [rep_id][merge_id] + Array[Array[File]] fastqs_R1 = + if length(fastqs_rep10_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1, fastqs_rep10_R1] + else if length(fastqs_rep9_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1] + else if length(fastqs_rep8_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1] + else if length(fastqs_rep7_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1] + else if length(fastqs_rep6_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1] + else if length(fastqs_rep5_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1] + else if length(fastqs_rep4_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1] + else if length(fastqs_rep3_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1] + else if length(fastqs_rep2_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1] + else if length(fastqs_rep1_R1)>0 then + [fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] fastqs_R2 = + [fastqs_rep1_R2, fastqs_rep2_R2, fastqs_rep3_R2, fastqs_rep4_R2, fastqs_rep5_R2, + fastqs_rep6_R2, fastqs_rep7_R2, fastqs_rep8_R2, fastqs_rep9_R2, fastqs_rep10_R2] + + # temporary 2-dim ctl fastqs array [rep_id][merge_id] + Array[Array[File]] ctl_fastqs_R1 = + if length(ctl_fastqs_rep10_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1] + else if length(ctl_fastqs_rep9_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1] + else if length(ctl_fastqs_rep8_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1] + else if length(ctl_fastqs_rep7_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1] + else if length(ctl_fastqs_rep6_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1] + else if length(ctl_fastqs_rep5_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1] + else if length(ctl_fastqs_rep4_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1] + else if length(ctl_fastqs_rep3_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1] + else if length(ctl_fastqs_rep2_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1] + else if length(ctl_fastqs_rep1_R1)>0 then + [ctl_fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] ctl_fastqs_R2 = + [ctl_fastqs_rep1_R2, ctl_fastqs_rep2_R2, ctl_fastqs_rep3_R2, ctl_fastqs_rep4_R2, ctl_fastqs_rep5_R2, + ctl_fastqs_rep6_R2, ctl_fastqs_rep7_R2, ctl_fastqs_rep8_R2, ctl_fastqs_rep9_R2, ctl_fastqs_rep10_R2] + + # temporary variables to get number of replicates + # WDLic implementation of max(A,B,C,...) + Int num_rep_fastq = length(fastqs_R1) + Int num_rep_bam = if length(bams) 0 || num_ctl_fastq > 0) && aligner_ != 'bwa' && aligner_ != 'bowtie2' && aligner_ != 'custom' ) { + call raise_exception as error_wrong_aligner { input: + msg = 'Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bwa' && use_bwa_mem_for_pe ) { + call raise_exception as error_use_bwa_mem_for_non_bwa { input: + msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) { + call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: + msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) { + call raise_exception as error_custom_aligner { input: + msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + runtime_environment = runtime_environment + } + } + + if ( ( ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0 ) && num_ctl > 1 && length(ctl_paired_ends) > 1 ) { + call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: + msg = 'Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for ' + + 'multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). ' + + 'Automatic control subsampling is enabled by default. ' + + 'Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. ' + + 'You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done ' + + 'for individual control\'s TAG-ALIGN output according to each control\'s endedness. ', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_ctl > 0 ) { + call raise_exception as error_ctl_input_defined_in_control_mode { input: + msg = 'In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_rep_fastq == 0 ) { + call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: + msg = 'Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.', + runtime_environment = runtime_environment + } + } + + # align each replicate + scatter(i in range(num_rep)) { + # to override endedness definition for individual replicate + # paired_end will override paired_ends[i] + Boolean paired_end_ = if !defined(paired_end) && i0 + Boolean has_output_of_align = i0 + Boolean has_output_of_align_ctl = i1 ) { + # pool tagaligns from true replicates + call pool_ta { input : + tas = ta_, + prefix = 'rep', + runtime_environment = runtime_environment + } + } + + # if there are pr1 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 1 + call pool_ta as pool_ta_pr1 { input : + tas = spr.ta_pr1, + prefix = 'rep-pr1', + runtime_environment = runtime_environment + } + } + + # if there are pr2 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 2 + call pool_ta as pool_ta_pr2 { input : + tas = spr.ta_pr2, + prefix = 'rep-pr2', + runtime_environment = runtime_environment + } + } + + # if there are CTL TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_))==num_ctl + if ( has_all_inputs_of_pool_ta_ctl && num_ctl>1 ) { + # pool tagaligns from true replicates + call pool_ta as pool_ta_ctl { input : + tas = ctl_ta_, + prefix = 'ctl', + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) + if ( has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep>1 ) { + call count_signal_track as count_signal_track_pooled { input : + ta = pool_ta.ta_pooled, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_))==num_rep + if ( has_input_of_jsd && num_rep > 0 && enable_jsd_ ) { + # fingerprint and JS-distance plot + call jsd { input : + nodup_bams = nodup_bam_, + ctl_bams = ctl_nodup_bam_, # use first control only + blacklist = blacklist_, + mapq_thresh = mapq_thresh_, + + cpu = jsd_cpu, + mem_factor = jsd_mem_factor, + time_hr = jsd_time_hr, + disk_factor = jsd_disk_factor, + runtime_environment = runtime_environment + } + } + + Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep + && length(select_all(ctl_ta_))==num_ctl && num_ctl > 0 + if ( has_all_input_of_choose_ctl && !align_only_ ) { + # choose appropriate control for each exp IP replicate + # outputs: + # choose_ctl.idx : control replicate index for each exp replicate + # -1 means pooled ctl replicate + call choose_ctl { input: + tas = ta_, + ctl_tas = ctl_ta_, + ta_pooled = pool_ta.ta_pooled, + ctl_ta_pooled = pool_ta_ctl.ta_pooled, + always_use_pooled_ctl = always_use_pooled_ctl, + ctl_depth_ratio = ctl_depth_ratio, + ctl_depth_limit = ctl_depth_limit, + exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, + runtime_environment = runtime_environment + } + } + + scatter(i in range(num_rep)) { + # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] + # chosen_ctl_ta_id + # >=0: control TA index (this means that control TA with this index exists) + # -1: use pooled control + # -2: there is no control + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_ids])[i] else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample])[i] else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false + else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] + else ctl_paired_end_[chosen_ctl_ta_id] + + if ( chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0 ) { + call subsample_ctl { input: + ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled + else ctl_ta_[ chosen_ctl_ta_id ], + subsample = chosen_ctl_ta_subsample, + paired_end = chosen_ctl_paired_end, + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] + else if chosen_ctl_ta_subsample > 0 then [ select_first([subsample_ctl.ta_subsampled]) ] + else if chosen_ctl_ta_id == -1 then [ select_first([pool_ta_ctl.ta_pooled]) ] + else [ select_first([ctl_ta_[ chosen_ctl_ta_id ]]) ] + } + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample_pooled]) else 0 + + # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) + Array[Int] fraglen_tmp = select_all(fraglen_) + + # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks + scatter(i in range(num_rep)) { + Boolean has_input_of_call_peak = defined(ta_[i]) + Boolean has_output_of_call_peak = i 1 ) { + # rounded mean of fragment length, which will be used for + # 1) calling peaks for pooled true/pseudo replicates + # 2) calculating FRiP + call rounded_mean as fraglen_mean { input : + ints = fraglen_tmp, + runtime_environment = runtime_environment + } + # } + + if ( has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0 ) { + call subsample_ctl as subsample_ctl_pooled { input: + ta = if num_ctl < 2 then ctl_ta_[0] + else pool_ta_ctl.ta_pooled, + subsample = chosen_ctl_ta_pooled_subsample, + paired_end = ctl_paired_end_[0], + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + # actually not an array + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] + else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled ] + else if num_ctl < 2 then [ ctl_ta_[0] ] + else [ pool_ta_ctl.ta_pooled ] + + Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) + Boolean has_output_of_call_peak_pooled = defined(peak_pooled) + if ( has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + # call peaks on pooled replicate + # always call peaks for pooled replicate to get signal tracks + call call_peak as call_peak_pooled { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled + else call_peak_pooled.peak + + # macs2 signal track for pooled rep + if ( has_input_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + call macs2_signal_track as macs2_signal_track_pooled { input : + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_mean.rounded_mean, + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2 + } + } + + Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) + Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) + if ( has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 1st pooled pseudo replicates + call call_peak as call_peak_ppr1 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr1.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 + else call_peak_ppr1.peak + + Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) + Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) + if ( has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 2nd pooled pseudo replicates + call call_peak as call_peak_ppr2 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr2.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 + else call_peak_ppr2.peak + + # do IDR/overlap on all pairs of two replicates (i,j) + # where i and j are zero-based indices and 0 <= i < j < num_rep + scatter( pair in cross(range(num_rep),range(num_rep)) ) { + # pair.left = 0-based index of 1st replicate + # pair.right = 0-based index of 2nd replicate + File? peak1_ = peak_[pair.left] + File? peak2_ = peak_[pair.right] + if ( !align_only_ && pair.left 1 ) { + # Naive overlap on pooled pseudo replicates + call overlap as overlap_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 1 && enable_idr ) { + # IDR on pooled pseduo replicates + call idr as idr_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + idr_thresh = idr_thresh, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + # reproducibility QC for overlap/IDR peaks + if ( !align_only_ && !true_rep_only && num_rep > 0 ) { + # reproducibility QC for overlapping peaks + call reproducibility as reproducibility_overlap { input : + prefix = 'overlap', + peaks = select_all(overlap.bfilt_overlap_peak), + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([overlap_pr.bfilt_overlap_peak]) else [], + peak_ppr = overlap_ppr.bfilt_overlap_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 0 && enable_idr ) { + # reproducibility QC for IDR peaks + call reproducibility as reproducibility_idr { input : + prefix = 'idr', + peaks = select_all(idr.bfilt_idr_peak), + peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([idr_pr.bfilt_idr_peak]) else [], + peak_ppr = idr_ppr.bfilt_idr_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + # Generate final QC report and JSON + call qc_report { input : + pipeline_ver = pipeline_ver, + title = title, + description = description, + genome = genome_name_, + paired_ends = paired_end_, + ctl_paired_ends = ctl_paired_end_, + pipeline_type = pipeline_type, + aligner = aligner_, + no_dup_removal = no_dup_removal, + peak_caller = peak_caller_, + cap_num_peak = cap_num_peak_, + idr_thresh = idr_thresh, + pval_thresh = pval_thresh, + xcor_trim_bp = xcor_trim_bp, + xcor_subsample_reads = xcor_subsample_reads, + + samstat_qcs = select_all(align.samstat_qc), + nodup_samstat_qcs = select_all(filter.samstat_qc), + dup_qcs = select_all(filter.dup_qc), + lib_complexity_qcs = select_all(filter.lib_complexity_qc), + xcor_plots = select_all(xcor.plot_png), + xcor_scores = select_all(xcor.score), + + ctl_samstat_qcs = select_all(align_ctl.samstat_qc), + ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), + ctl_dup_qcs = select_all(filter_ctl.dup_qc), + ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), + + jsd_plot = jsd.plot, + jsd_qcs = if defined(jsd.jsd_qcs) then select_first([jsd.jsd_qcs]) else [], + + frip_qcs = select_all(call_peak.frip_qc), + frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), + frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), + frip_qc_pooled = call_peak_pooled.frip_qc, + frip_qc_ppr1 = call_peak_ppr1.frip_qc, + frip_qc_ppr2 = call_peak_ppr2.frip_qc, + + idr_plots = select_all(idr.idr_plot), + idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([idr_pr.idr_plot]) else [], + idr_plot_ppr = idr_ppr.idr_plot, + frip_idr_qcs = select_all(idr.frip_qc), + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([idr_pr.frip_qc]) else [], + frip_idr_qc_ppr = idr_ppr.frip_qc, + frip_overlap_qcs = select_all(overlap.frip_qc), + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([overlap_pr.frip_qc]) else [], + frip_overlap_qc_ppr = overlap_ppr.frip_qc, + idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, + overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, + + gc_plots = select_all(gc_bias.gc_plot), + + peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), + peak_region_size_plots = select_all(call_peak.peak_region_size_plot), + num_peak_qcs = select_all(call_peak.num_peak_qc), + + idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, + idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, + + overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, + overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, + + runtime_environment = runtime_environment + } + + output { + File report = qc_report.report + File qc_json = qc_report.qc_json + Boolean qc_json_ref_match = qc_report.qc_json_ref_match + } +} + +task align { + input { + Array[File] fastqs_R1 # [merge_id] + Array[File] fastqs_R2 + File? ref_fa + Int? trim_bp # this is for R1 only + Int crop_length + Int crop_length_tol + String? trimmomatic_phred_score_format + + String aligner + + String mito_chr_name + Int? multimapping + File? custom_align_py + File? idx_tar # reference index tar + Boolean paired_end + Boolean use_bwa_mem_for_pe + Int bwa_mem_read_len_limit + Boolean use_bowtie2_local_mode + + String? trimmomatic_java_heap + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") + Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) + + Float trimmomatic_java_heap_factor = 0.9 + Array[Array[File]] tmp_fastqs = if paired_end then transpose([fastqs_R1, fastqs_R2]) + else transpose([fastqs_R1]) + command { + set -e + + # check if pipeline dependencies can be found + if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] + then + echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 + exit 3 + fi + python3 $(which encode_task_merge_fastq.py) \ + ${write_tsv(tmp_fastqs)} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--nth ' + cpu} + + if [ -z '${trim_bp}' ]; then + SUFFIX= + else + SUFFIX=_trimmed + python3 $(which encode_task_trim_fastq.py) \ + R1/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R1$SUFFIX + if [ '${paired_end}' == 'true' ]; then + python3 $(which encode_task_trim_fastq.py) \ + R2/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R2$SUFFIX + fi + fi + if [ '${crop_length}' == '0' ]; then + SUFFIX=$SUFFIX + else + NEW_SUFFIX="$SUFFIX"_cropped + python3 $(which encode_task_trimmomatic.py) \ + --fastq1 R1$SUFFIX/*.fastq.gz \ + ${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + --crop-length ${crop_length} \ + --crop-length-tol "${crop_length_tol}" \ + ${'--phred-score-format ' + trimmomatic_phred_score_format } \ + --out-dir-R1 R1$NEW_SUFFIX \ + ${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \ + ${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + 'G')} \ + ${'--nth ' + cpu} + SUFFIX=$NEW_SUFFIX + fi + + if [ '${aligner}' == 'bwa' ]; then + python3 $(which encode_task_bwa.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \ + ${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + + elif [ '${aligner}' == 'bowtie2' ]; then + python3 $(which encode_task_bowtie2.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${'--multimapping ' + multimapping} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bowtie2_local_mode then '--local' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + else + python3 ${custom_align_py} \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_align.py) \ + R1$SUFFIX/*.fastq.gz $(ls *.bam) \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + rm -rf R1 R2 R1$SUFFIX R2$SUFFIX + } + output { + File bam = glob('*.bam')[0] + File bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File read_len_log = glob('*.read_length.txt')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task filter { + input { + File? bam + Boolean paired_end + File? ref_fa + Boolean redact_nodup_bam + String dup_marker # picard.jar MarkDuplicates (picard) or + # sambamba markdup (sambamba) + Int mapq_thresh # threshold for low MAPQ reads removal + Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM + File chrsz # 2-col chromosome sizes file + Boolean no_dup_removal # no dupe reads removal when filtering BAM + String mito_chr_name + + Int cpu + Float mem_factor + String? picard_java_heap + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float picard_java_heap_factor = 0.9 + Float mem_gb = 6.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_filter.py) \ + ${bam} \ + ${if paired_end then '--paired-end' else ''} \ + --multimapping 0 \ + ${'--dup-marker ' + dup_marker} \ + ${'--mapq-thresh ' + mapq_thresh} \ + --filter-chrs ${sep=' ' filter_chrs} \ + ${'--chrsz ' + chrsz} \ + ${if no_dup_removal then '--no-dup-removal' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + + if [ '${redact_nodup_bam}' == 'true' ]; then + python3 $(which encode_task_bam_to_pbam.py) \ + $(ls *.bam) \ + ${'--ref-fa ' + ref_fa} \ + '--delete-original-bam' + fi + } + output { + File nodup_bam = glob('*.bam')[0] + File nodup_bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File dup_qc = glob('*.dup.qc')[0] + File lib_complexity_qc = glob('*.lib_complexity.qc')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task bam2ta { + input { + File? bam + Boolean paired_end + String mito_chr_name # mito chromosome name + Int subsample # number of reads to subsample TAGALIGN + # this affects all downstream analysis + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_bam2ta.py) \ + ${bam} \ + --disable-tn5-shift \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + } + output { + File ta = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task spr { + input { + File? ta + Boolean paired_end + Int pseudoreplication_random_seed + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_spr.py) \ + ${ta} \ + ${'--pseudoreplication-random-seed ' + pseudoreplication_random_seed} \ + ${if paired_end then '--paired-end' else ''} + } + output { + File ta_pr1 = glob('*.pr1.tagAlign.gz')[0] + File ta_pr2 = glob('*.pr2.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task pool_ta { + input { + Array[File?] tas + Int? col # number of columns in pooled TA + String? prefix # basename prefix + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_pool_ta.py) \ + ${sep=' ' select_all(tas)} \ + ${'--prefix ' + prefix} \ + ${'--col ' + col} + } + output { + File ta_pooled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '8 GB' + time : 4 + disks : 'local-disk 100 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task xcor { + input { + File? ta + Boolean paired_end + String mito_chr_name + Int subsample # number of reads to subsample TAGALIGN + # this will be used for xcor only + # will not affect any downstream analysis + String? chip_seq_type + Int? exclusion_range_min + Int? exclusion_range_max + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 8.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_xcor.py) \ + ${ta} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--chip-seq-type ' + chip_seq_type} \ + ${'--exclusion-range-min ' + exclusion_range_min} \ + ${'--exclusion-range-max ' + exclusion_range_max} \ + ${'--subsample ' + subsample} \ + ${'--nth ' + cpu} + } + output { + File plot_pdf = glob('*.cc.plot.pdf')[0] + File plot_png = glob('*.cc.plot.png')[0] + File score = glob('*.cc.qc')[0] + File fraglen_log = glob('*.cc.fraglen.txt')[0] + Int fraglen = read_int(fraglen_log) + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task jsd { + input { + Array[File?] nodup_bams + Array[File?] ctl_bams + File? blacklist + Int mapq_thresh + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") + Float mem_gb = 5.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_jsd.py) \ + ${sep=' ' select_all(nodup_bams)} \ + ${if length(ctl_bams)>0 then '--ctl-bam '+ select_first(ctl_bams) else ''} \ + ${'--mapq-thresh '+ mapq_thresh} \ + ${'--blacklist '+ blacklist} \ + ${'--nth ' + cpu} + } + output { + File plot = glob('*.png')[0] + Array[File] jsd_qcs = glob('*.jsd.qc') + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task choose_ctl { + input { + Array[File?] tas + Array[File?] ctl_tas + File? ta_pooled + File? ctl_ta_pooled + Boolean always_use_pooled_ctl # always use pooled control for all exp rep. + Float ctl_depth_ratio # if ratio between controls is higher than this + # then always use pooled control for all exp rep. + Int ctl_depth_limit + Float exp_ctl_depth_ratio_limit + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_choose_ctl.py) \ + --tas ${sep=' ' select_all(tas)} \ + --ctl-tas ${sep=' ' select_all(ctl_tas)} \ + ${'--ta-pooled ' + ta_pooled} \ + ${'--ctl-ta-pooled ' + ctl_ta_pooled} \ + ${if always_use_pooled_ctl then '--always-use-pooled-ctl' else ''} \ + ${'--ctl-depth-ratio ' + ctl_depth_ratio} \ + ${'--ctl-depth-limit ' + ctl_depth_limit} \ + ${'--exp-ctl-depth-ratio-limit ' + exp_ctl_depth_ratio_limit} + } + output { + File chosen_ctl_id_tsv = glob('chosen_ctl.tsv')[0] + File chosen_ctl_subsample_tsv = glob('chosen_ctl_subsample.tsv')[0] + File chosen_ctl_subsample_pooled_txt = glob('chosen_ctl_subsample_pooled.txt')[0] + Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) + Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) + Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task count_signal_track { + input { + File? ta # tag-align + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + Float mem_gb = 8.0 + + command { + set -e + python3 $(which encode_task_count_signal_track.py) \ + ${ta} \ + ${'--chrsz ' + chrsz} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pos_bw = glob('*.positive.bigwig')[0] + File neg_bw = glob('*.negative.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task subsample_ctl { + input { + File? ta + Boolean paired_end + Int subsample + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + python3 $(which encode_task_subsample_ctl.py) \ + ${ta} \ + ${'--subsample ' + subsample} \ + ${if paired_end then '--paired-end' else ''} \ + } + output { + File ta_subsampled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task call_peak { + input { + String peak_caller + String peak_type + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Int cap_num_peak # cap number of raw peaks called from MACS2 + Float pval_thresh # p.value threshold for MACS2 + Float? fdr_thresh # FDR threshold for SPP + + File? blacklist # blacklist BED to filter raw peaks + String? regex_bfilt_peak_chr_name + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + + if [ '${peak_caller}' == 'macs2' ]; then + python3 $(which encode_task_macs2_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + + elif [ '${peak_caller}' == 'spp' ]; then + python3 $(which encode_task_spp.py) \ + ${sep=' ' select_all(tas)} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--fdr-thresh '+ fdr_thresh} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_call_peak_chip.py) \ + $(ls *Peak.gz) \ + ${'--ta ' + tas[0]} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--peak-type ' + peak_type} \ + ${'--blacklist ' + blacklist} + } + output { + File peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + # generated by post_call_peak py + File bfilt_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = glob('*.frip.qc')[0] + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : if peak_caller == 'macs2' then 2 else cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task macs2_signal_track { + input { + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Float pval_thresh # p.value threshold + + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_macs2_signal_track_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pval_bw = glob('*.pval.signal.bigwig')[0] + File fc_bw = glob('*.fc.signal.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task idr { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + Float idr_thresh + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor + File chrsz # 2-col chromosome sizes file + String peak_type + String rank + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_idr.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--idr-thresh ' + idr_thresh} \ + ${'--peak-type ' + peak_type} \ + --idr-rank ${rank} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File idr_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_idr_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_idr_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_idr_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_idr_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_idr_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File idr_plot = glob('*.txt.png')[0] + File idr_unthresholded_peak = glob('*.txt.gz')[0] + File idr_log = glob('*.idr*.log')[0] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task overlap { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor (for FRIP) + File chrsz # 2-col chromosome sizes file + String peak_type + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_overlap.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + --nonamecheck \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File overlap_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_overlap_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_overlap_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_overlap_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_overlap_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_overlap_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task reproducibility { + input { + String prefix + Array[File] peaks # peak files from pair of true replicates + # in a sorted order. for example of 4 replicates, + # 1,2 1,3 1,4 2,3 2,4 3,4. + # x,y means peak file from rep-x vs rep-y + Array[File] peaks_pr # peak files from pseudo replicates + File? peak_ppr # Peak file from pooled pseudo replicate. + String peak_type + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_reproducibility.py) \ + ${sep=' ' peaks} \ + --peaks-pr ${sep=' ' peaks_pr} \ + ${'--peak-ppr '+ peak_ppr} \ + --prefix ${prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--chrsz ' + chrsz} + } + output { + File optimal_peak = glob('*optimal_peak.*.gz')[0] + File optimal_peak_bb = glob('*optimal_peak.*.bb')[0] + File optimal_peak_starch = glob('*optimal_peak.*.starch')[0] + File optimal_peak_hammock = glob('*optimal_peak.*.hammock.gz*')[0] + File optimal_peak_hammock_tbi = glob('*optimal_peak.*.hammock.gz*')[1] + File conservative_peak = glob('*conservative_peak.*.gz')[0] + File conservative_peak_bb = glob('*conservative_peak.*.bb')[0] + File conservative_peak_starch = glob('*conservative_peak.*.starch')[0] + File conservative_peak_hammock = glob('*conservative_peak.*.hammock.gz*')[0] + File conservative_peak_hammock_tbi = glob('*conservative_peak.*.hammock.gz*')[1] + File reproducibility_qc = glob('*reproducibility.qc')[0] + # QC metrics for optimal peak + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task gc_bias { + input { + File? nodup_bam + File ref_fa + + String? picard_java_heap + + RuntimeEnvironment runtime_environment + } + Float mem_factor = 0.3 + Float input_file_size_gb = size(nodup_bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float picard_java_heap_factor = 0.9 + + command { + set -e + python3 $(which encode_task_gc_bias.py) \ + ${'--nodup-bam ' + nodup_bam} \ + ${'--ref-fa ' + ref_fa} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + } + output { + File gc_plot = glob('*.gc_plot.png')[0] + File gc_log = glob('*.gc.txt')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 6 + disks : 'local-disk 250 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task qc_report { + input { + # optional metadata + String pipeline_ver + String title # name of sample + String description # description for sample + String? genome + #String? encode_accession_id # ENCODE accession ID of sample + # workflow params + Array[Boolean] paired_ends + Array[Boolean] ctl_paired_ends + String pipeline_type + String aligner + Boolean no_dup_removal + String peak_caller + Int cap_num_peak + Float idr_thresh + Float pval_thresh + Int xcor_trim_bp + Int xcor_subsample_reads + # QCs + Array[File] samstat_qcs + Array[File] nodup_samstat_qcs + Array[File] dup_qcs + Array[File] lib_complexity_qcs + Array[File] ctl_samstat_qcs + Array[File] ctl_nodup_samstat_qcs + Array[File] ctl_dup_qcs + Array[File] ctl_lib_complexity_qcs + Array[File] xcor_plots + Array[File] xcor_scores + File? jsd_plot + Array[File] jsd_qcs + Array[File] idr_plots + Array[File] idr_plots_pr + File? idr_plot_ppr + Array[File] frip_qcs + Array[File] frip_qcs_pr1 + Array[File] frip_qcs_pr2 + File? frip_qc_pooled + File? frip_qc_ppr1 + File? frip_qc_ppr2 + Array[File] frip_idr_qcs + Array[File] frip_idr_qcs_pr + File? frip_idr_qc_ppr + Array[File] frip_overlap_qcs + Array[File] frip_overlap_qcs_pr + File? frip_overlap_qc_ppr + File? idr_reproducibility_qc + File? overlap_reproducibility_qc + + Array[File] gc_plots + + Array[File] peak_region_size_qcs + Array[File] peak_region_size_plots + Array[File] num_peak_qcs + + File? idr_opt_peak_region_size_qc + File? idr_opt_peak_region_size_plot + File? idr_opt_num_peak_qc + + File? overlap_opt_peak_region_size_qc + File? overlap_opt_peak_region_size_plot + File? overlap_opt_num_peak_qc + + File? qc_json_ref + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ + ${'--pipeline-ver ' + pipeline_ver} \ + ${"--title '" + sub(title,"'","_") + "'"} \ + ${"--desc '" + sub(description,"'","_") + "'"} \ + ${'--genome ' + genome} \ + ${'--multimapping ' + 0} \ + --paired-ends ${sep=' ' paired_ends} \ + --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ + --pipeline-type ${pipeline_type} \ + --aligner ${aligner} \ + ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ + --peak-caller ${peak_caller} \ + ${'--cap-num-peak ' + cap_num_peak} \ + --idr-thresh ${idr_thresh} \ + --pval-thresh ${pval_thresh} \ + --xcor-trim-bp ${xcor_trim_bp} \ + --xcor-subsample-reads ${xcor_subsample_reads} \ + --samstat-qcs ${sep='_:_' samstat_qcs} \ + --nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \ + --dup-qcs ${sep='_:_' dup_qcs} \ + --lib-complexity-qcs ${sep='_:_' lib_complexity_qcs} \ + --xcor-plots ${sep='_:_' xcor_plots} \ + --xcor-scores ${sep='_:_' xcor_scores} \ + --idr-plots ${sep='_:_' idr_plots} \ + --idr-plots-pr ${sep='_:_' idr_plots_pr} \ + --ctl-samstat-qcs ${sep='_:_' ctl_samstat_qcs} \ + --ctl-nodup-samstat-qcs ${sep='_:_' ctl_nodup_samstat_qcs} \ + --ctl-dup-qcs ${sep='_:_' ctl_dup_qcs} \ + --ctl-lib-complexity-qcs ${sep='_:_' ctl_lib_complexity_qcs} \ + ${'--jsd-plot ' + jsd_plot} \ + --jsd-qcs ${sep='_:_' jsd_qcs} \ + ${'--idr-plot-ppr ' + idr_plot_ppr} \ + --frip-qcs ${sep='_:_' frip_qcs} \ + --frip-qcs-pr1 ${sep='_:_' frip_qcs_pr1} \ + --frip-qcs-pr2 ${sep='_:_' frip_qcs_pr2} \ + ${'--frip-qc-pooled ' + frip_qc_pooled} \ + ${'--frip-qc-ppr1 ' + frip_qc_ppr1} \ + ${'--frip-qc-ppr2 ' + frip_qc_ppr2} \ + --frip-idr-qcs ${sep='_:_' frip_idr_qcs} \ + --frip-idr-qcs-pr ${sep='_:_' frip_idr_qcs_pr} \ + ${'--frip-idr-qc-ppr ' + frip_idr_qc_ppr} \ + --frip-overlap-qcs ${sep='_:_' frip_overlap_qcs} \ + --frip-overlap-qcs-pr ${sep='_:_' frip_overlap_qcs_pr} \ + ${'--frip-overlap-qc-ppr ' + frip_overlap_qc_ppr} \ + ${'--idr-reproducibility-qc ' + idr_reproducibility_qc} \ + ${'--overlap-reproducibility-qc ' + overlap_reproducibility_qc} \ + --gc-plots ${sep='_:_' gc_plots} \ + --peak-region-size-qcs ${sep='_:_' peak_region_size_qcs} \ + --peak-region-size-plots ${sep='_:_' peak_region_size_plots} \ + --num-peak-qcs ${sep='_:_' num_peak_qcs} \ + ${'--idr-opt-peak-region-size-qc ' + idr_opt_peak_region_size_qc} \ + ${'--idr-opt-peak-region-size-plot ' + idr_opt_peak_region_size_plot} \ + ${'--idr-opt-num-peak-qc ' + idr_opt_num_peak_qc} \ + ${'--overlap-opt-peak-region-size-qc ' + overlap_opt_peak_region_size_qc} \ + ${'--overlap-opt-peak-region-size-plot ' + overlap_opt_peak_region_size_plot} \ + ${'--overlap-opt-num-peak-qc ' + overlap_opt_num_peak_qc} \ + --out-qc-html qc.html \ + --out-qc-json qc.json \ + ${'--qc-json-ref ' + qc_json_ref} + } + output { + File report = glob('*qc.html')[0] + File qc_json = glob('*qc.json')[0] + Boolean qc_json_ref_match = read_string('qc_json_ref_match.txt')=='True' + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +### workflow system tasks +task read_genome_tsv { + input { + File? genome_tsv + String? null_s + + RuntimeEnvironment runtime_environment + } + command <<< + echo "$(basename ~{genome_tsv})" > genome_name + # create empty files for all entries + touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 + touch mito_chr_name + touch regex_bfilt_peak_chr_name + + python <>> + output { + String? genome_name = read_string('genome_name') + String? ref_fa = if size('ref_fa')==0 then null_s else read_string('ref_fa') + String? bwa_idx_tar = if size('bwa_idx_tar')==0 then null_s else read_string('bwa_idx_tar') + String? bowtie2_idx_tar = if size('bowtie2_idx_tar')==0 then null_s else read_string('bowtie2_idx_tar') + String? chrsz = if size('chrsz')==0 then null_s else read_string('chrsz') + String? gensz = if size('gensz')==0 then null_s else read_string('gensz') + String? blacklist = if size('blacklist')==0 then null_s else read_string('blacklist') + String? blacklist2 = if size('blacklist2')==0 then null_s else read_string('blacklist2') + String? mito_chr_name = if size('mito_chr_name')==0 then null_s else read_string('mito_chr_name') + String? regex_bfilt_peak_chr_name = if size('regex_bfilt_peak_chr_name')==0 then 'chr[\\dXY]+' + else read_string('regex_bfilt_peak_chr_name') + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task rounded_mean { + input { + Array[Int] ints + + RuntimeEnvironment runtime_environment + } + command <<< + python <>> + output { + Int rounded_mean = read_int('tmp.txt') + } + runtime { + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task raise_exception { + input { + String msg + + RuntimeEnvironment runtime_environment + } + command { + echo -e "\n* Error: ${msg}\n" >&2 + exit 2 + } + output { + String error_msg = '${msg}' + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} \ No newline at end of file diff --git a/wdl-format/tests/format/clays_complex_script/source.wdl b/wdl-format/tests/format/clays_complex_script/source.wdl new file mode 100644 index 000000000..81faa4fa6 --- /dev/null +++ b/wdl-format/tests/format/clays_complex_script/source.wdl @@ -0,0 +1,165 @@ +## # Header +# regular comment will be left as is +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput + +## part of preamble +version 1.2 + +#@ except: MissingMetas +struct AStruct { + String member +} + +task a_task { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input + # Here is a comment before the input. + { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + command <<< >>> + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } + + requirements + # This is a comment before the requirements. + { + container: "ubuntu:latest" + } + + hints { + max_cpu: 1 + } +} + +## These double-pound-sign comments +## should be converted to single-pound-sign comments. +workflow hello { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] ## This should be converted to a single-pound-sign comment. + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + call a_task { + } + + scatter (name in name_array) { + call say_task { greeting = greeting } + } + + if (some_condition_task) { + call a_task as task_two {} + } + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } +} \ No newline at end of file diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.wdl new file mode 100644 index 000000000..7e3333f0a --- /dev/null +++ b/wdl-format/tests/format/complex_meta_and_calls/source.wdl @@ -0,0 +1,106 @@ +version +1.0 +workflow +test_wf +{ +input +{ +SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { +noncanonical_motifs: 30, +GT_AG_and_CT_AC_motif: 12, +} +} +parameter_meta +{ +out_sj_filter_overhang_min: { +type: "SpliceJunctionMotifs", +label: "Minimum overhang required to support a splicing junction" +} +} +output +{ +SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min +String a = "friend" +Int b = 1 + 2 +String c = "Hello, ~{a}" +Map[String, Int] d = { "a": 0, "b": 1, "c": 2} +} +meta { +a: "hello" +b: 'world' +c: 5 +d: -0xf +e: 1.0e10 +f: -2. +g: true +h: false +i: null +j: { +a: [1, 2, 3], +b: ["hello", "world", "!"], +c: { +x: 1, +y: 2, +z: 3 +} +} +k: [ +{ +a: {}, +b: 0, +c: "", +d: '', +e: [], +}, +{ +x: [1.0, 2.0, 3.0] +} +] +} +call no_params +call with_params { input: a, b, c, d = 1 } +call qualified.name +call qualified.name { input: a = 1, b = 2, c = "3" } +call aliased as x +call aliased as x { input: } +call f after x after y +call f after x after y { input: a = [] } +call f as x after x +call f as x after x after y { input: name = "hello" } +call test_task as foo { +input: bowchicka = "wowwow" +} +if ( +true +) { + +call test_task after foo { +input: bowchicka = "bowchicka" +} +scatter (i in range(3)) { +call test_task as bar { +input: bowchicka = i * 42 +} +} +} + +} +task +test_task +{ +command <<<>>> +input { +String bowchicka +} +parameter_meta { +bowchicka: { +type: "String", +label: "Bowchicka" +} +} +} + +struct SpliceJunctionMotifs { +Int noncanonical_motifs +Int GT_AG_and_CT_AC_motif +} diff --git a/wdl-format/tests/format/imports_with_both_comments/source.wdl b/wdl-format/tests/format/imports_with_both_comments/source.wdl new file mode 100644 index 000000000..1c32809f6 --- /dev/null +++ b/wdl-format/tests/format/imports_with_both_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# fileA 1.1 +import # fileA 1.2 +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" # fileA 2.3 +# fileA 3.1 +as # fileA 3.2 +# fileA 4.1 +bar # fileA 4.2 +# fileA 5.1 +alias # fileA 5.2 +# fileA 6.1 +qux # fileA 6.2 +# fileA 7.1 +as # fileA 7.2 +# fileA 8.1 +Qux # fileA 8.2 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" # also fileC diff --git a/wdl-format/tests/format/imports_with_inline_comments/source.wdl b/wdl-format/tests/format/imports_with_inline_comments/source.wdl new file mode 100644 index 000000000..f633e72d8 --- /dev/null +++ b/wdl-format/tests/format/imports_with_inline_comments/source.wdl @@ -0,0 +1,12 @@ +version 1.0 +import "fileB.wdl" as foo # fileB +workflow test {} +import "fileC.wdl" # fileC +import # fileA 1 +"fileA.wdl" # fileA 2 +as # fileA 3 +bar # fileA 4 +alias # fileA 5 +qux # fileA 6 +as # fileA 7 +Qux # fileA 8 diff --git a/wdl-format/tests/format/imports_with_no_comments/source.wdl b/wdl-format/tests/format/imports_with_no_comments/source.wdl new file mode 100644 index 000000000..e69a1a727 --- /dev/null +++ b/wdl-format/tests/format/imports_with_no_comments/source.wdl @@ -0,0 +1,7 @@ + version 1.1 + + import "fileB.wdl" as foo + import "fileA.wdl" as bar alias cows as horses + alias cats as dogs + workflow test {} + import "fileC.wdl" alias qux as Qux diff --git a/wdl-format/tests/format/imports_with_preceding_comments/source.wdl b/wdl-format/tests/format/imports_with_preceding_comments/source.wdl new file mode 100644 index 000000000..a27e7a4fc --- /dev/null +++ b/wdl-format/tests/format/imports_with_preceding_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" +# this comment belongs to fileB +import "fileB.wdl" as foo +# fileA 1 +import +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" +# fileA 3 +as +# fileA 4 +bar +# fileA 5 +alias +# fileA 6 +qux +# fileA 7 +as +# fileA 8 +Qux diff --git a/wdl-format/tests/format/interrupt_example/source.wdl b/wdl-format/tests/format/interrupt_example/source.wdl new file mode 100644 index 000000000..30e667287 --- /dev/null +++ b/wdl-format/tests/format/interrupt_example/source.wdl @@ -0,0 +1,10 @@ +version # interrupt +1.2 # how far should '1.2' be indented? + +workflow +# interrupt +test # should this be indented? +# interrupt +{ meta # interrupt +{ # how far should this bracket be indented? +}} \ No newline at end of file diff --git a/wdl-format/tests/format/seaseq-case/LICENSE.txt b/wdl-format/tests/format/seaseq-case/LICENSE.txt new file mode 100644 index 000000000..335221306 --- /dev/null +++ b/wdl-format/tests/format/seaseq-case/LICENSE.txt @@ -0,0 +1,205 @@ +'source.wdl' obtained from: https://github.com/stjude/seaseq/blob/49493a7097e655671b915171e6debe40fa284200/seaseq-case.wdl +on the date 08-05-2024. +It was accompanied by the following license: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/wdl-format/tests/format/seaseq-case/source.wdl b/wdl-format/tests/format/seaseq-case/source.wdl new file mode 100644 index 000000000..94c76656e --- /dev/null +++ b/wdl-format/tests/format/seaseq-case/source.wdl @@ -0,0 +1,898 @@ +version 1.0 +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/workflows/bamtogff.wdl" +import "workflows/tasks/sicer.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/workflows/visualization.wdl" as viz +import "workflows/workflows/mapping.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra + +workflow seaseq { + String pipeline_ver = 'v2.0.0' + + meta { + title: 'SEAseq Analysis' + summary: 'Single-End Antibody Sequencing (SEAseq) Pipeline' + description: 'A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis.' + version: '2.0.0' + details: { + citation: 'https://doi.org/10.1186/s12859-022-04588-z', + contactEmail: 'modupeore.adetunji@stjude.org', + contactOrg: "St Jude Children's Research Hospital", + contactUrl: "", + upstreamLicenses: "MIT", + upstreamUrl: 'https://github.com/stjude/seaseq', + whatsNew: [ + { + version: "2.0", + changes: ["version of case/sample only", "single-end sequencing with input/control sequencing data", "Initial release"] + } + ] + } + parameter_group: { + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .', + help: 'Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input FASTQ data', + description: 'Genomic input files for experiment.', + help: 'Input one or more sample data and/or SRA identifiers.' + }, + analysis_parameter: { + title: 'Analysis parameter', + description: 'Analysis settings needed for experiment.', + help: 'Analysis settings; such output analysis file name.' + } + } + } + input { + # group: reference_genome + File reference + File? spikein_reference + File? blacklist + File gtf + Array[File]? bowtie_index + Array[File]? spikein_bowtie_index + Array[File]? motif_databases + + # group: input_genomic_data + Array[String]? sample_sraid + Array[File]? sample_fastq + + # group: analysis_parameter + String? results_name + Boolean run_motifs=true + + } + + parameter_meta { + reference: { + description: 'Reference FASTA file', + group: 'reference_genome', + patterns: ["*.fa", "*.fasta", "*.fa.gz", "*.fasta.gz"] + } + blacklist: { + description: 'Blacklist file in BED format', + group: 'reference_genome', + help: 'If defined, blacklist regions listed are excluded after reference alignment.', + patterns: ["*.bed", "*.bed.gz"] + } + gtf: { + description: 'gene annotation file (.gtf)', + group: 'reference_genome', + help: 'Input gene annotation file from RefSeq or GENCODE (.gtf).', + patterns: ["*.gtf", "*.gtf.gz", "*.gff", "*.gff.gz", "*.gff3", "*.gff3.gz"] + } + bowtie_index: { + description: 'bowtie v1 index files (*.ebwt)', + group: 'reference_genome', + help: 'If not defined, bowtie v1 index files are generated, will take a longer compute time.', + patterns: ["*.ebwt"] + } + motif_databases: { + description: 'One or more of the MEME suite motif databases (*.meme)', + group: 'reference_genome', + help: 'Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).', + patterns: ["*.meme"] + } + sample_sraid: { + description: 'One or more sample SRA (Sequence Read Archive) run identifiers', + group: 'input_genomic_data', + help: 'Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).', + example: 'SRR12345678' + } + sample_fastq: { + description: 'One or more sample FASTQs', + group: 'input_genomic_data', + help: 'Upload zipped FASTQ files.', + patterns: ["*.fq.gz", "*.fastq.gz"] + } + results_name: { + description: 'Experiment results custom name', + group: 'analysis_parameter', + help: 'Input preferred analysis results name (recommended if multiple FASTQs are provided).', + example: 'AllMerge_mapped' + } + run_motifs: { + description: 'Perform Motif Analysis', + group: 'analysis_parameter', + help: 'Setting this means Motif Discovery and Enrichment analysis will be performed.', + example: true + } + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 1 ----------- ### +### ------ Pre-process Analysis Files ------ ### +### ---------------------------------------- ### + + # Process SRRs + if ( defined(sample_sraid) ) { + # Download sample file(s) from SRA database + # outputs: + # fastqdump.fastqfile : downloaded sample files in fastq.gz format + Array[String] string_sra = [1] #buffer to allow for sra_id optionality + Array[String] s_sraid = select_first([sample_sraid, string_sra]) + scatter (eachsra in s_sraid) { + call sra.fastqdump { + input : + sra_id=eachsra, + cloud=false + } + } # end scatter each sra + + Array[File] sample_srafile = flatten(fastqdump.fastqfile) + } # end if sample_sraid + + # Generating INDEX files + #1. Bowtie INDEX files if not provided + if ( !defined(bowtie_index) ) { + # create bowtie index when not provided + call bowtie.index as bowtie_idx { + input : + reference=reference + } + } + #2. Make sure indexes are six else build indexes + if ( defined(bowtie_index) ) { + # check total number of bowtie indexes provided + Array[String] string_bowtie_index = [1] #buffer to allow for bowtie_index optionality + Array[File] int_bowtie_index = select_first([bowtie_index, string_bowtie_index]) + if ( length(int_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as bowtie_idx_2 { + input : + reference=reference + } + } + } + Array[File] actual_bowtie_index = select_first([bowtie_idx_2.bowtie_indexes, bowtie_idx.bowtie_indexes, bowtie_index]) + + # Spike-in DNA + #3. Bowtie INDEX files if not provided + String string_spikein = "1" + Array[String] string_spikein_buffer = [1] + if ( !defined(spikein_bowtie_index) && defined(spikein_reference) ) { + # create bowtie index on spikein genome + call bowtie.index as spikein_bowtie_idx { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + + #4. Make sure indexes are six else build indexes for Spike-in DNA + if ( defined(spikein_bowtie_index) ) { + # check total number of bowtie indexes provided + Array[File] int_spikein_bowtie_index = select_first([spikein_bowtie_index, string_spikein_buffer]) + if ( length(int_spikein_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as spikein_bowtie_idx_2 { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + } + Array[File] actual_spikein_bowtie_index = select_first([spikein_bowtie_idx_2.bowtie_indexes, spikein_bowtie_idx.bowtie_indexes, spikein_bowtie_index, string_spikein_buffer]) + + # FASTA faidx and chromsizes and effective genome size + call samtools.faidx as samtools_faidx { + # create FASTA index and chrom sizes files + input : + reference=reference + } + call util.effective_genome_size as egs { + # effective genome size for FASTA + input : + reference=reference + } + + # Process FASTQs + if ( defined(sample_fastq) ) { + + Array[String] string_fastq = [1] #buffer to allow for fastq optionality + Array[File] s_fastq = select_first([sample_fastq, string_fastq]) + + Array[File] sample_fastqfile = s_fastq + } + Array[File] original_fastqfiles = flatten(select_all([sample_srafile, sample_fastqfile])) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 1 ---------------- ### +### ----------- B: remove Spike-IN reads ------------ ### +### ------------------------------------------------- ### + + # if multiple fastqfiles are provided + Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false + Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false + + if ( defined(spikein_bowtie_index) || defined(spikein_reference) ) { + scatter (eachfastq in original_fastqfiles) { + call fastqc.fastqc as spikein_indv_fastqc { + input : + inputfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' + } + call util.basicfastqstats as spikein_indv_bfs { + input : + fastqfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + call bowtie.spikein_SE as spikein_indv_map { + input : + fastqfile=eachfastq, + index_files=actual_spikein_bowtie_index, + metricsfile=spikein_indv_bfs.metrics_out, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + } + + Array[File] spikein_fastqfiles = spikein_indv_map.unaligned + } + Array[File] fastqfiles = select_first([spikein_fastqfiles, original_fastqfiles]) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 2 ---------------- ### +### ---- A: analysis if multiple FASTQs provided ---- ### +### ------------------------------------------------- ### + + if ( multi_fastq ) { + scatter (eachfastq in fastqfiles) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as indv_fastqc { + input : + inputfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as indv_bfs { + input : + fastqfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping as indv_mapping { + input : + fastqfile=eachfastq, + index_files=actual_bowtie_index, + metricsfile=indv_bfs.metrics_out, + blacklist=blacklist, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as indv_bamfqc { + input : + inputfile=indv_mapping.sorted_bam, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as indv_runspp { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call bedtools.bamtobed as indv_bamtobed { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call util.evalstats as indv_summarystats { + input: + fastq_type="SEAseq Sample FASTQ", + bambed=indv_bamtobed.bedfile, + sppfile=indv_runspp.spp_out, + fastqczip=indv_fastqc.zipfile, + bamflag=indv_mapping.bam_stats, + rmdupflag=indv_mapping.mkdup_stats, + bkflag=indv_mapping.bklist_stats, + fastqmetrics=indv_bfs.metrics_out, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + } # end scatter (for each sample fastq) + + # MERGE BAM FILES + # Execute analysis on merge bam file + # Analysis executed: + # Merge BAM (if more than 1 fastq is provided) + # FastQC on Merge BAM (AllMerge__mapped) + + # merge bam files and perform fasTQC if more than one is provided + call util.mergehtml { + input: + htmlfiles=indv_summarystats.xhtml, + txtfiles=indv_summarystats.textfile, + default_location='SAMPLE', + outputfile = 'AllMapped_' + length(fastqfiles) + '_seaseq-summary-stats.html' + } + + call samtools.mergebam { + input: + bamfiles=indv_mapping.sorted_bam, + metricsfiles=indv_bfs.metrics_out, + default_location = if defined(results_name) then results_name + '/BAM_files' else 'AllMerge_' + length(indv_mapping.sorted_bam) + '_mapped' + '/BAM_files', + outputfile = if defined(results_name) then results_name + '.sorted.bam' else 'AllMerge_' + length(fastqfiles) + '_mapped.sorted.bam' + } + + call fastqc.fastqc as mergebamfqc { + input: + inputfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/QC/FastQC' + } + + call samtools.indexstats as mergeindexstats { + input: + bamfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + + if ( defined(blacklist) ) { + # remove blacklist regions + String string_blacklist = "" #buffer to allow for blacklist optionality + File blacklist_file = select_first([blacklist, string_blacklist]) + call bedtools.intersect as merge_rmblklist { + input : + fileA=mergebam.mergebam, + fileB=blacklist_file, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files', + nooverlap=true + } + call samtools.indexstats as merge_bklist { + input : + bamfile=merge_rmblklist.intersect_out, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + } # end if blacklist provided + + File mergebam_afterbklist = select_first([merge_rmblklist.intersect_out, mergebam.mergebam]) + + call samtools.markdup as merge_markdup { + input : + bamfile=mergebam_afterbklist, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + + call samtools.indexstats as merge_mkdup { + input : + bamfile=merge_markdup.mkdupbam, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + } # end if length(fastqfiles) > 1: multi_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 2 ----------- ### +### -- B: analysis if one FASTQ provided --- ### +### ---------------------------------------- ### + + # if only one fastqfile is provided + if ( one_fastq ) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as uno_fastqc { + input : + inputfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as uno_bfs { + input : + fastqfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping { + input : + fastqfile=fastqfiles[0], + index_files=actual_bowtie_index, + metricsfile=uno_bfs.metrics_out, + blacklist=blacklist, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as uno_bamfqc { + input : + inputfile=mapping.sorted_bam, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as uno_runspp { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + + call bedtools.bamtobed as uno_bamtobed { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + } # end if length(fastqfiles) == 1: one_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 3 ----------- ### +### ----------- ChIP-seq analysis ---------- ### +### ---------------------------------------- ### + + # ChIP-seq and downstream analysis + # Execute analysis on merge bam file + # Analysis executed: + # FIRST: Check if reads are mapped + # Peaks identification (SICER, MACS, ROSE) + # Motif analysis + # Complete Summary statistics + + #collate correct files for downstream analysis + File sample_bam = select_first([mergebam_afterbklist, mapping.bklist_bam, mapping.sorted_bam]) + + call macs.macs { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="auto", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-auto', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-auto' + } + + call util.addreadme { + input : + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS' + } + + call macs.macs as all { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="all", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-all', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-all' + } + + call macs.macs as nomodel { + input : + bamfile=sample_bam, + nomodel=true, + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-nm', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_nm' + } + + call bamtogff.bamtogff { + input : + gtffile=gtf, + chromsizes=samtools_faidx.chromsizes, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/BAM_Density' + } + + call bedtools.bamtobed as forsicerbed { + input : + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]) + } + + call sicer.sicer { + input : + bedfile=forsicerbed.bedfile, + chromsizes=samtools_faidx.chromsizes, + genome_fraction=egs.genomefraction, + fragmentlength=select_first([uno_bfs.readlength, mergebam.avg_readlength]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/BROAD_peaks', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call rose.rose { + input : + gtffile=gtf, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + bedfile_auto=macs.peakbedfile, + bedfile_all=all.peakbedfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/STITCHED_peaks' + } + + call runspp.runspp { + input: + bamfile=sample_bam + } + + call util.peaksanno { + input : + gtffile=gtf, + bedfile=macs.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as all_peaksanno { + input : + gtffile=gtf, + bedfile=all.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=all.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as nomodel_peaksanno { + input : + gtffile=gtf, + bedfile=nomodel.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=nomodel.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as sicer_peaksanno { + input : + gtffile=gtf, + bedfile=sicer.scoreisland, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/BROAD_peaks' + } + + # Motif Analysis + if (run_motifs) { + call motifs.motifs { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=macs.peakbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call util.flankbed { + input : + bedfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call motifs.motifs as flank { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=flankbed.flankbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + } + + call viz.visualization { + input: + wigfile=macs.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=macs.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizall { + input: + wigfile=all.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=all.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as viznomodel { + input: + wigfile=nomodel.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=nomodel.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizsicer { + input: + wigfile=sicer.wigfile, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call bedtools.bamtobed as finalbed { + input: + bamfile=sample_bam + } + + call sortbed.sortbed { + input: + bedfile=finalbed.bedfile + } + + call bedtools.intersect { + input: + fileA=macs.peakbedfile, + fileB=sortbed.sortbed_out, + countoverlap=true, + sorted=true + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 4 ----------- ### +### ---------- Summary Statistics ---------- ### +### ---------------------------------------- ### + + String string_qual = "" #buffer to allow for optionality in if statement + + #SUMMARY STATISTICS + if ( one_fastq ) { + call util.evalstats as uno_summarystats { + # SUMMARY STATISTICS of sample file (only 1 sample file provided) + input: + fastq_type="SEAseq Sample FASTQ", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([uno_bamfqc.zipfile, string_qual]), + bamflag=mapping.bam_stats, + rmdupflag=mapping.mkdup_stats, + bkflag=mapping.bklist_stats, + fastqmetrics=uno_bfs.metrics_out, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as uno_overallsummary { + # Presenting all quality stats for the analysis + input: + overallqc_html=uno_summarystats.xhtml, + overallqc_txt=uno_summarystats.textfile + } + } # end if one_fastq + + if ( multi_fastq ) { + call util.evalstats as merge_summarystats { + # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) + input: + fastq_type="SEAseq Comprehensive", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([mergebamfqc.zipfile, string_qual]), + bamflag=mergeindexstats.flagstats, + rmdupflag=merge_mkdup.flagstats, + bkflag=merge_bklist.flagstats, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as merge_overallsummary { + # Presenting all quality stats for the analysis + input: + sampleqc_html=mergehtml.xhtml, + overallqc_html=merge_summarystats.xhtml, + sampleqc_txt=mergehtml.mergetxt, + overallqc_txt=merge_summarystats.textfile + } + } # end if multi_fastq + + output { + #SPIKE-IN + Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile + Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile + Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output + + #FASTQC + Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile + Array[File?]? indv_s_zipfile = indv_fastqc.zipfile + Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile + Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile + + File? s_mergebam_htmlfile = mergebamfqc.htmlfile + File? s_mergebam_zipfile = mergebamfqc.zipfile + + File? uno_s_htmlfile = uno_fastqc.htmlfile + File? uno_s_zipfile = uno_fastqc.zipfile + File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile + File? uno_s_bam_zipfile = uno_bamfqc.zipfile + + #BASICMETRICS + Array[File?]? s_metrics_out = indv_bfs.metrics_out + File? uno_s_metrics_out = uno_bfs.metrics_out + + #BAMFILES + Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam + Array[File?]? indv_s_indexbam = indv_mapping.bam_index + Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam + Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index + Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam + Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index + + File? uno_s_sortedbam = mapping.sorted_bam + File? uno_s_indexstatsbam = mapping.bam_index + File? uno_s_bkbam = mapping.bklist_bam + File? uno_s_bkindexbam = mapping.bklist_index + File? uno_s_rmbam = mapping.mkdup_bam + File? uno_s_rmindexbam = mapping.mkdup_index + + File? s_mergebamfile = mergebam.mergebam + File? s_mergebamindex = mergeindexstats.indexbam + File? s_bkbam = merge_rmblklist.intersect_out + File? s_bkindexbam = merge_bklist.indexbam + File? s_rmbam = merge_markdup.mkdupbam + File? s_rmindexbam = merge_mkdup.indexbam + + #MACS + File? peakbedfile = macs.peakbedfile + File? peakxlsfile = macs.peakxlsfile + File? summitsfile = macs.summitsfile + File? negativexlsfile = macs.negativepeaks + File? wigfile = macs.wigfile + File? all_peakbedfile = all.peakbedfile + File? all_peakxlsfile = all.peakxlsfile + File? all_summitsfile = all.summitsfile + File? all_wigfile = all.wigfile + File? all_negativexlsfile = all.negativepeaks + File? nm_peakbedfile = nomodel.peakbedfile + File? nm_peakxlsfile = nomodel.peakxlsfile + File? nm_summitsfile = nomodel.summitsfile + File? nm_wigfile = nomodel.wigfile + File? nm_negativexlsfile = nomodel.negativepeaks + File? readme_peaks = addreadme.readme_peaks + + #SICER + File? scoreisland = sicer.scoreisland + File? sicer_wigfile = sicer.wigfile + + #ROSE + File? pngfile = rose.pngfile + File? mapped_union = rose.mapped_union + File? mapped_stitch = rose.mapped_stitch + File? enhancers = rose.enhancers + File? super_enhancers = rose.super_enhancers + File? gff_file = rose.gff_file + File? gff_union = rose.gff_union + File? union_enhancers = rose.union_enhancers + File? stitch_enhancers = rose.stitch_enhancers + File? e_to_g_enhancers = rose.e_to_g_enhancers + File? g_to_e_enhancers = rose.g_to_e_enhancers + File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers + File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers + File? supergenes = rose.super_genes + File? allgenes = rose.all_genes + + #MOTIFS + File? flankbedfile = flankbed.flankbedfile + + File? ame_tsv = motifs.ame_tsv + File? ame_html = motifs.ame_html + File? ame_seq = motifs.ame_seq + File? meme = motifs.meme_out + File? meme_summary = motifs.meme_summary + + File? summit_ame_tsv = flank.ame_tsv + File? summit_ame_html = flank.ame_html + File? summit_ame_seq = flank.ame_seq + File? summit_meme = flank.meme_out + File? summit_meme_summary = flank.meme_summary + + #BAM2GFF + File? s_matrices = bamtogff.s_matrices + File? densityplot = bamtogff.densityplot + File? pdf_gene = bamtogff.pdf_gene + File? pdf_h_gene = bamtogff.pdf_h_gene + File? png_h_gene = bamtogff.png_h_gene + File? jpg_h_gene = bamtogff.jpg_h_gene + File? pdf_promoters = bamtogff.pdf_promoters + File? pdf_h_promoters = bamtogff.pdf_h_promoters + File? png_h_promoters = bamtogff.png_h_promoters + File? jpg_h_promoters = bamtogff.jpg_h_promoters + + #PEAKS-ANNOTATION + File? peak_promoters = peaksanno.peak_promoters + File? peak_genebody = peaksanno.peak_genebody + File? peak_window = peaksanno.peak_window + File? peak_closest = peaksanno.peak_closest + File? peak_comparison = peaksanno.peak_comparison + File? gene_comparison = peaksanno.gene_comparison + File? pdf_comparison = peaksanno.pdf_comparison + + File? all_peak_promoters = all_peaksanno.peak_promoters + File? all_peak_genebody = all_peaksanno.peak_genebody + File? all_peak_window = all_peaksanno.peak_window + File? all_peak_closest = all_peaksanno.peak_closest + File? all_peak_comparison = all_peaksanno.peak_comparison + File? all_gene_comparison = all_peaksanno.gene_comparison + File? all_pdf_comparison = all_peaksanno.pdf_comparison + + File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters + File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody + File? nomodel_peak_window = nomodel_peaksanno.peak_window + File? nomodel_peak_closest = nomodel_peaksanno.peak_closest + File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison + File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison + File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison + + File? sicer_peak_promoters = sicer_peaksanno.peak_promoters + File? sicer_peak_genebody = sicer_peaksanno.peak_genebody + File? sicer_peak_window = sicer_peaksanno.peak_window + File? sicer_peak_closest = sicer_peaksanno.peak_closest + File? sicer_peak_comparison = sicer_peaksanno.peak_comparison + File? sicer_gene_comparison = sicer_peaksanno.gene_comparison + File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison + + #VISUALIZATION + File? bigwig = visualization.bigwig + File? norm_wig = visualization.norm_wig + File? tdffile = visualization.tdffile + File? n_bigwig = viznomodel.bigwig + File? n_norm_wig = viznomodel.norm_wig + File? n_tdffile = viznomodel.tdffile + File? a_bigwig = vizall.bigwig + File? a_norm_wig = vizall.norm_wig + File? a_tdffile = vizall.tdffile + + File? s_bigwig = vizsicer.bigwig + File? s_norm_wig = vizsicer.norm_wig + File? s_tdffile = vizsicer.tdffile + + #QC-STATS + Array[File?]? s_qc_statsfile = indv_summarystats.statsfile + Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile + Array[File?]? s_qc_textfile = indv_summarystats.textfile + File? s_qc_mergehtml = mergehtml.mergefile + + File? s_uno_statsfile = uno_summarystats.statsfile + File? s_uno_htmlfile = uno_summarystats.htmlfile + File? s_uno_textfile = uno_summarystats.textfile + + File? statsfile = merge_summarystats.statsfile + File? htmlfile = merge_summarystats.htmlfile + File? textfile = merge_summarystats.textfile + + File? summaryhtml = select_first([uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml]) + File? summarytxt = select_first([uno_overallsummary.summarytxt,merge_overallsummary.summarytxt]) + } +} \ No newline at end of file From 030a3e213531fc276f33e34e4aa463157585c6a0 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 4 Oct 2024 15:40:12 -0400 Subject: [PATCH 03/60] fix(wdl-ast/element): typos in macro call --- wdl-ast/src/element.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl-ast/src/element.rs b/wdl-ast/src/element.rs index 369b126be..8dfb99a59 100644 --- a/wdl-ast/src/element.rs +++ b/wdl-ast/src/element.rs @@ -347,8 +347,8 @@ ast_element_impl!( literal_object_item(): LiteralObjectItemNode => LiteralObjectItem => LiteralObjectItem, literal_output(): LiteralOutputNode => LiteralOutput => LiteralOutput, literal_output_item(): LiteralOutputItemNode => LiteralOutputItem => LiteralOutputItem, - literal_pair(): LiteralStringNode => LiteralPair => LiteralPair, - literal_string(): LiteralStringText => LiteralString => LiteralString, + literal_pair(): LiteralPairNode => LiteralPair => LiteralPair, + literal_string(): LiteralStringNode => LiteralString => LiteralString, literal_struct(): LiteralStructNode => LiteralStruct => LiteralStruct, literal_struct_item(): LiteralStructItemNode => LiteralStructItem => LiteralStructItem, logical_and_expr(): LogicalAndExprNode => LogicalAndExpr => LogicalAndExpr, From 067e89f0871a98cd5ced369c0125be59117070e9 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 4 Oct 2024 15:41:05 -0400 Subject: [PATCH 04/60] [WIP] --- wdl-format/src/v1/workflow/call.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs index 8cc06e5f1..9c794f722 100644 --- a/wdl-format/src/v1/workflow/call.rs +++ b/wdl-format/src/v1/workflow/call.rs @@ -44,9 +44,20 @@ pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream

) { let mut children = element.children_by_kind(); - if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { - let keyword = exactly_one!(idents, "idents"); - (&keyword).write(stream); + if let Some(idents) = children.remove(&SyntaxKind::Ident) { + let mut idents = idents.into_iter(); + let first_ident = idents.next().expect("at least one ident"); + (&first_ident).write(stream); + + if let Some(mut dots) = children.remove(&SyntaxKind::Dot) { + let dot = exactly_one!(dots, "dots"); + (&dot).write(stream); + + let second_ident = idents.next().expect("second ident"); + (&second_ident).write(stream); + + assert!(idents.next().is_none(), "too many idents"); + } } if !children.is_empty() { From 03a0a6e59cbd721bc6a89273c1691b91a7d6c890 Mon Sep 17 00:00:00 2001 From: Clay McLeod Date: Fri, 4 Oct 2024 15:38:55 -0500 Subject: [PATCH 05/60] revise: `s/preceeding/preceding/g` --- wdl-format/src/element.rs | 38 +++++++++++++++++++------------------- wdl-format/src/lib.rs | 4 ++-- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index ac48f901e..7857134dd 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -9,10 +9,10 @@ use wdl_ast::Element; use wdl_ast::Node; use wdl_ast::SyntaxKind; -use crate::NEWLINE; use crate::PreToken; use crate::TokenStream; use crate::Writable; +use crate::NEWLINE; pub mod node; @@ -21,17 +21,17 @@ pub mod node; /// Trivia would be things like comments and whitespace. #[derive(Clone, Debug, Default)] pub struct Trivia { - /// Any preceeding trivia. - preceeding: Option>>, + /// Any preceding trivia. + preceding: Option>>, /// Any inline trivia. inline: Option>>, } impl Trivia { - /// Any preceeding trivia that are not whitespaces. - pub fn preceeding(&self) -> Option> { - self.preceeding.as_ref().map(|trivia| { + /// Any preceding trivia that are not whitespaces. + pub fn preceding(&self) -> Option> { + self.preceding.as_ref().map(|trivia| { trivia .into_iter() .filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) @@ -119,9 +119,9 @@ impl FormatElement { results } - /// Writes any preceeding trivia to the stream. - pub fn write_preceeding_trivia(&self, stream: &mut TokenStream) { - if let Some(trivia) = self.trivia().preceeding() { + /// Writes any preceding trivia to the stream. + pub fn write_preceding_trivia(&self, stream: &mut TokenStream) { + if let Some(trivia) = self.trivia().preceding() { for t in trivia.filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) { t.write(stream); } @@ -202,7 +202,7 @@ fn collate(node: &Node) -> Option>> { .peekable(); while stream.peek().is_some() { - let preceeding = collect_optional( + let preceding = collect_optional( take_while_peek(stream.by_ref(), |node| node.is_trivia()) .map(|item| Box::new(item.into_format_element())), ); @@ -251,7 +251,7 @@ fn collate(node: &Node) -> Option>> { results.push(Box::new(FormatElement { element, - trivia: Trivia { preceeding, inline }, + trivia: Trivia { preceding, inline }, children, })); } @@ -311,7 +311,7 @@ workflow bar # This is an inline comment on the workflow ident. SyntaxKind::VersionStatementNode ); - assert!(version.trivia().preceeding().is_none()); + assert!(version.trivia().preceding().is_none()); assert!(version.trivia().inline().is_none()); let mut version_children = version.children().unwrap(); @@ -336,9 +336,9 @@ workflow bar # This is an inline comment on the workflow ident. // Preceeding. - let mut preceeding = task.trivia().preceeding().unwrap(); + let mut preceding = task.trivia().preceding().unwrap(); - let comment = preceeding + let comment = preceding .next() .unwrap() .element() @@ -413,9 +413,9 @@ workflow bar # This is an inline comment on the workflow ident. // Preceeding. - let mut preceeding = workflow.trivia().preceeding().unwrap(); + let mut preceding = workflow.trivia().preceding().unwrap(); - let comment = preceeding + let comment = preceding .next() .unwrap() .element() @@ -484,9 +484,9 @@ workflow bar # This is an inline comment on the workflow ident. let call = workflow_children.next().unwrap(); assert_eq!(call.element().kind(), SyntaxKind::CallStatementNode); - let mut call_preceeding = call.trivia().preceeding().unwrap(); + let mut call_preceding = call.trivia().preceding().unwrap(); - let comment = call_preceeding + let comment = call_preceding .next() .unwrap() .element() @@ -496,7 +496,7 @@ workflow bar # This is an inline comment on the workflow ident. assert_eq!(comment.kind(), SyntaxKind::Comment); assert_eq!(comment.text(), "# This is attached to the call."); - assert!(call_preceeding.next().is_none()); + assert!(call_preceding.next().is_none()); assert_eq!( workflow_children.next().unwrap().element().kind(), diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index c862ac472..db36595b5 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -47,7 +47,7 @@ pub trait Writable { impl Writable for &FormatElement { fn write(&self, stream: &mut TokenStream) { - self.write_preceeding_trivia(stream); + self.write_preceding_trivia(stream); match self.element() { Element::Node(node) => match node { @@ -197,8 +197,8 @@ mod tests { use wdl_ast::Document; use wdl_ast::Node; - use crate::Formatter; use crate::element::node::AstNodeFormatExt as _; + use crate::Formatter; #[test] fn smoke() { From 006456930952065c65010df7c4ec8a3491270248 Mon Sep 17 00:00:00 2001 From: Clay McLeod Date: Fri, 4 Oct 2024 15:40:08 -0500 Subject: [PATCH 06/60] revise: updates default indent size --- wdl-format/src/config/indent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-format/src/config/indent.rs b/wdl-format/src/config/indent.rs index 528f3858c..2b109adac 100644 --- a/wdl-format/src/config/indent.rs +++ b/wdl-format/src/config/indent.rs @@ -5,7 +5,7 @@ use std::sync::LazyLock; /// The default indentation. pub static DEFAULT_INDENT: LazyLock = - LazyLock::new(|| Indent::Spaces(NonZeroUsize::new(2).unwrap())); + LazyLock::new(|| Indent::Spaces(NonZeroUsize::new(4).unwrap())); /// An indentation level. #[derive(Clone, Copy, Debug)] From bd0591f65b1446a39707318d72db04e93364dadf Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Sat, 5 Oct 2024 10:53:51 -0400 Subject: [PATCH 07/60] [WIP]revise: rework trivia --- wdl-ast/src/lib.rs | 1 + wdl-format/src/element.rs | 206 +-------------------------------- wdl-format/src/element/node.rs | 2 +- wdl-format/src/lib.rs | 6 +- wdl-format/src/token/pre.rs | 29 ++++- 5 files changed, 36 insertions(+), 208 deletions(-) diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index cd92148af..99bdcbece 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -49,6 +49,7 @@ pub use wdl_grammar::Severity; pub use wdl_grammar::Span; pub use wdl_grammar::SupportedVersion; pub use wdl_grammar::SyntaxElement; +pub use wdl_grammar::SyntaxExt; pub use wdl_grammar::SyntaxKind; pub use wdl_grammar::SyntaxNode; pub use wdl_grammar::SyntaxToken; diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index 7857134dd..24b4ddbf3 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -4,19 +4,13 @@ use std::collections::HashMap; use std::iter::Peekable; use nonempty::NonEmpty; -use wdl_ast::AstToken as _; use wdl_ast::Element; use wdl_ast::Node; use wdl_ast::SyntaxKind; -use crate::PreToken; -use crate::TokenStream; -use crate::Writable; -use crate::NEWLINE; - pub mod node; -/// Trivia associated with some more formidable element. +/// Trivia associated with a token. /// /// Trivia would be things like comments and whitespace. #[derive(Clone, Debug, Default)] @@ -56,25 +50,14 @@ pub struct FormatElement { /// The inner element. element: Element, - /// Trivia associated with the element. - trivia: Trivia, - /// Children as format elements. children: Option>>, } impl FormatElement { /// Creates a new [`FormatElement`]. - pub fn new( - element: Element, - trivia: Trivia, - children: Option>>, - ) -> Self { - Self { - element, - trivia, - children, - } + pub fn new(element: Element, children: Option>>) -> Self { + Self { element, children } } /// Gets the inner element. @@ -82,11 +65,6 @@ impl FormatElement { &self.element } - /// Gets the trivia. - pub fn trivia(&self) -> &Trivia { - &self.trivia - } - /// Gets the children for this node. pub fn children(&self) -> Option> { self.children @@ -118,24 +96,6 @@ impl FormatElement { results } - - /// Writes any preceding trivia to the stream. - pub fn write_preceding_trivia(&self, stream: &mut TokenStream) { - if let Some(trivia) = self.trivia().preceding() { - for t in trivia.filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) { - t.write(stream); - } - } - } - - /// Writes any inline trivia to the stream. - pub fn write_inline_trivia(&self, stream: &mut TokenStream) { - if let Some(trivia) = self.trivia().inline() { - for t in trivia.filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) { - t.write(stream); - } - } - } } /// An extension trait for formatting [`Element`]s. @@ -154,7 +114,7 @@ impl AstElementFormatExt for Element { Element::Token(_) => None, }; - FormatElement::new(self, Default::default(), children) + FormatElement::new(self, children) } } @@ -202,58 +162,17 @@ fn collate(node: &Node) -> Option>> { .peekable(); while stream.peek().is_some() { - let preceding = collect_optional( - take_while_peek(stream.by_ref(), |node| node.is_trivia()) - .map(|item| Box::new(item.into_format_element())), - ); - let element = match stream.next() { Some(node) => node, None => break, }; - let inline = collect_optional( - take_while_peek(stream.by_ref(), |element| { - if element.is_trivia() { - // If the element is trivia, we need to check if it contains a - // newline. - match element { - Element::Node(_) => { - // SAFETY: if this is reached, then the code needs to be - // altered. The fact that nodes should not be trivia is - // not baked into the code per se, but it's not expected - // to ever occur. If this ends up happening and it makes - // sense to change this, feel free to do so. - unreachable!("nodes should not be trivia") - } - Element::Token(token) => { - // NOTE: if the token _is_ whitespace, then return false - // only if the token contains a newline. Else, this - // should continue consuming the whitespace. - token - .as_whitespace() - .map(|whitespace| !whitespace.syntax().text().contains(NEWLINE)) - .unwrap_or(true) - } - } - } else { - // If the element isn't trivia, we don't consume it. - false - } - }) - .map(|item| Box::new(item.into_format_element())), - ); - let children = match element { Element::Node(ref node) => collate(node), Element::Token(_) => None, }; - results.push(Box::new(FormatElement { - element, - trivia: Trivia { preceding, inline }, - children, - })); + results.push(Box::new(FormatElement { element, children })); } if !results.is_empty() { @@ -311,9 +230,6 @@ workflow bar # This is an inline comment on the workflow ident. SyntaxKind::VersionStatementNode ); - assert!(version.trivia().preceding().is_none()); - assert!(version.trivia().inline().is_none()); - let mut version_children = version.children().unwrap(); assert_eq!( version_children.next().unwrap().element().kind(), @@ -334,36 +250,6 @@ workflow bar # This is an inline comment on the workflow ident. SyntaxKind::TaskDefinitionNode ); - // Preceeding. - - let mut preceding = task.trivia().preceding().unwrap(); - - let comment = preceding - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(comment.kind(), SyntaxKind::Comment); - assert_eq!(comment.text(), "# This is a comment attached to the task."); - - // Inline. - - let mut inline = task.trivia().inline().unwrap(); - - let comment = inline - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(comment.kind(), SyntaxKind::Comment); - assert_eq!(comment.text(), "# This is an inline comment on the task."); - - assert!(inline.next().is_none()); - // Children. let mut task_children = task.children().unwrap(); @@ -375,21 +261,6 @@ workflow bar # This is an inline comment on the workflow ident. let ident = task_children.next().unwrap(); assert_eq!(ident.element().kind(), SyntaxKind::Ident); - let mut ident_inline = ident.trivia().inline().unwrap(); - - let inline_comment = ident_inline - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(inline_comment.kind(), SyntaxKind::Comment); - assert_eq!( - inline_comment.text(), - "# This is an inline comment on the task ident." - ); - assert_eq!( task_children.next().unwrap().element().kind(), SyntaxKind::OpenBrace @@ -411,42 +282,6 @@ workflow bar # This is an inline comment on the workflow ident. SyntaxKind::WorkflowDefinitionNode ); - // Preceeding. - - let mut preceding = workflow.trivia().preceding().unwrap(); - - let comment = preceding - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(comment.kind(), SyntaxKind::Comment); - assert_eq!( - comment.text(), - "# This is a comment attached to the workflow." - ); - - // Inline. - - let mut inline = workflow.trivia().inline().unwrap(); - - let comment = inline - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(comment.kind(), SyntaxKind::Comment); - assert_eq!( - comment.text(), - "# This is an inline comment on the workflow." - ); - - assert!(inline.next().is_none()); - // Children. let mut workflow_children = workflow.children().unwrap(); @@ -459,23 +294,6 @@ workflow bar # This is an inline comment on the workflow ident. let ident = workflow_children.next().unwrap(); assert_eq!(ident.element().kind(), SyntaxKind::Ident); - let mut ident_inline = ident.trivia().inline().unwrap(); - - let inline_comment = ident_inline - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(inline_comment.kind(), SyntaxKind::Comment); - assert_eq!( - inline_comment.text(), - "# This is an inline comment on the workflow ident." - ); - - assert!(ident_inline.next().is_none()); - assert_eq!( workflow_children.next().unwrap().element().kind(), SyntaxKind::OpenBrace @@ -484,20 +302,6 @@ workflow bar # This is an inline comment on the workflow ident. let call = workflow_children.next().unwrap(); assert_eq!(call.element().kind(), SyntaxKind::CallStatementNode); - let mut call_preceding = call.trivia().preceding().unwrap(); - - let comment = call_preceding - .next() - .unwrap() - .element() - .syntax() - .into_token() - .unwrap(); - assert_eq!(comment.kind(), SyntaxKind::Comment); - assert_eq!(comment.text(), "# This is attached to the call."); - - assert!(call_preceding.next().is_none()); - assert_eq!( workflow_children.next().unwrap().element().kind(), SyntaxKind::CloseBrace diff --git a/wdl-format/src/element/node.rs b/wdl-format/src/element/node.rs index 0e96bb689..9ef74309b 100644 --- a/wdl-format/src/element/node.rs +++ b/wdl-format/src/element/node.rs @@ -18,6 +18,6 @@ impl AstNodeFormatExt for Node { Self: Sized, { let children = collate(&self); - FormatElement::new(Element::Node(self), Default::default(), children) + FormatElement::new(Element::Node(self), children) } } diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index db36595b5..98c40560c 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -47,8 +47,6 @@ pub trait Writable { impl Writable for &FormatElement { fn write(&self, stream: &mut TokenStream) { - self.write_preceding_trivia(stream); - match self.element() { Element::Node(node) => match node { AstNode::AccessExpr(_) => todo!(), @@ -144,8 +142,6 @@ impl Writable for &FormatElement { stream.push_ast_token(token); } } - - self.write_inline_trivia(stream); } } @@ -197,8 +193,8 @@ mod tests { use wdl_ast::Document; use wdl_ast::Node; - use crate::element::node::AstNodeFormatExt as _; use crate::Formatter; + use crate::element::node::AstNodeFormatExt as _; #[test] fn smoke() { diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index d9f8916c2..36d730ca9 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -1,5 +1,6 @@ //! Tokens emitted during the formatting of particular elements. +use wdl_ast::SyntaxExt; use wdl_ast::SyntaxKind; use crate::Token; @@ -62,10 +63,36 @@ impl TokenStream { } /// Pushes an AST token into the stream. + /// + /// This will also push any preceding or inline trivia into the stream. + /// Any token may have preceding or inline trivia, unless that token is + /// itself trivia (i.e. trivia cannot have trivia). pub fn push_ast_token(&mut self, token: &wdl_ast::Token) { let syntax = token.syntax(); - let token = PreToken::Literal(syntax.text().to_owned(), syntax.kind()); + let kind = syntax.kind(); + let mut inline_comment = None; + if !kind.is_trivia() { + let preceding_trivia = syntax.preceding_trivia(); + for raw_trivia in preceding_trivia { + let trivia = match raw_trivia.as_str() { + "\n" => PreToken::Literal(raw_trivia, SyntaxKind::Whitespace), + _ => PreToken::Literal(raw_trivia.to_owned(), SyntaxKind::Comment), + }; + self.0.push(trivia); + } + if let Some(raw_inline_comment) = syntax.inline_comment() { + inline_comment = Some(PreToken::Literal( + raw_inline_comment.to_owned(), + SyntaxKind::Comment, + )); + } + } + let token = PreToken::Literal(syntax.text().to_owned(), kind); self.0.push(token); + + if let Some(inline_comment) = inline_comment { + self.0.push(inline_comment); + } } /// Gets an iterator of references to each token in the stream. From fd4da2e7fb480e988d37163a39273def3b141ccd Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Sat, 5 Oct 2024 14:30:05 -0400 Subject: [PATCH 08/60] [WIP]revise: trivia rework --- wdl-format/src/element.rs | 8 ++- wdl-format/src/token/pre.rs | 17 ++++-- wdl-grammar/src/tree.rs | 107 ++++++++++++++++++------------------ 3 files changed, 70 insertions(+), 62 deletions(-) diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index 24b4ddbf3..7095f515e 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -158,7 +158,13 @@ fn collate(node: &Node) -> Option>> { let mut stream = node .syntax() .children_with_tokens() - .map(Element::cast) + .filter_map(|syntax| { + if syntax.kind().is_trivia() { + None + } else { + Some(Element::cast(syntax)) + } + }) .peekable(); while stream.peek().is_some() { diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 36d730ca9..851e93d3c 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -73,16 +73,21 @@ impl TokenStream { let mut inline_comment = None; if !kind.is_trivia() { let preceding_trivia = syntax.preceding_trivia(); - for raw_trivia in preceding_trivia { - let trivia = match raw_trivia.as_str() { - "\n" => PreToken::Literal(raw_trivia, SyntaxKind::Whitespace), - _ => PreToken::Literal(raw_trivia.to_owned(), SyntaxKind::Comment), + for token in preceding_trivia { + let trivia = match token.kind() { + SyntaxKind::Whitespace => { + PreToken::Literal(String::from("\n"), SyntaxKind::Whitespace) + } + SyntaxKind::Comment => { + PreToken::Literal(token.text().to_owned(), SyntaxKind::Comment) + } + _ => unreachable!("unexpected trivia: {:?}", token), }; self.0.push(trivia); } - if let Some(raw_inline_comment) = syntax.inline_comment() { + if let Some(token) = syntax.inline_comment() { inline_comment = Some(PreToken::Literal( - raw_inline_comment.to_owned(), + token.text().to_owned(), SyntaxKind::Comment, )); } diff --git a/wdl-grammar/src/tree.rs b/wdl-grammar/src/tree.rs index bc473d6ec..e59c1ff42 100644 --- a/wdl-grammar/src/tree.rs +++ b/wdl-grammar/src/tree.rs @@ -718,26 +718,33 @@ impl fmt::Debug for SyntaxTree { } } -/// Gathers comments and blank lines from a [`SyntaxExt`]. -fn gather_trivia( +/// Gathers substantial trivia (comments and blank lines) from a [`SyntaxExt`]. +/// +/// Whitespace is considered substantial if it contains more than one newline. +/// Comments are always considered substantial. +fn gather_substantial_trivia( source: &T, direction: Direction, break_on_newline: bool, -) -> Box<[String]> { +) -> Box<[SyntaxToken]> { let iter = source.siblings_with_tokens(direction); - /// Adds the text to the currently collecting buffer in the right place + /// Adds the token to the currently collecting buffer in the right place /// depending in the direction we are traversing. - fn push_results(text: String, results: &mut VecDeque, direction: &Direction) { + fn push_results( + token: SyntaxToken, + results: &mut VecDeque, + direction: &Direction, + ) { match direction { - Direction::Next => results.push_back(text), - Direction::Prev => results.push_front(text), + Direction::Next => results.push_back(token), + Direction::Prev => results.push_front(token), } } - let comments = iter + let trivia = iter .skip_while(|e| source.matches(e)) - .take_while(|e| matches!(e.kind(), SyntaxKind::Comment | SyntaxKind::Whitespace)) + .take_while(|e| e.kind().is_trivia()) .fold_while(VecDeque::new(), |mut results, e| { match e.kind() { SyntaxKind::Comment => { @@ -751,7 +758,7 @@ fn gather_trivia( .clone() .into_token() .expect("whitespace should always be a token") - .to_string() + .text() .chars() .filter(|c| *c == '\n') .count(); @@ -771,30 +778,20 @@ fn gather_trivia( } } - let text = e - .into_token() - .expect("comment should always be a token") - .to_string() - .trim_end() - .to_string(); + let comment = e.into_token().expect("comment should always be a token"); - push_results(text, &mut results, &direction); + push_results(comment, &mut results, &direction); } SyntaxKind::Whitespace => { - let newlines = e - .into_token() - .expect("whitespace should always be a token") - .to_string() - .chars() - .filter(|c| *c == '\n') - .count(); + let token = e.into_token().expect("whitespace should always be a token"); + let newlines = token.text().chars().filter(|c| *c == '\n').count(); if break_on_newline && newlines > 0 { return FoldWhile::Done(results); } if newlines > 1 { - push_results("\n".to_string(), &mut results, &direction) + push_results(token, &mut results, &direction) } } // SAFETY: we just filtered out any non-comment and @@ -809,7 +806,7 @@ fn gather_trivia( // NOTE: most of the time, this conversion will be O(1). Occassionally // it will be O(n). No allocations will ever be done. Thus, the // ammortized cost of this is quite cheap. - Vec::from(comments).into_boxed_slice() + Vec::from(trivia).into_boxed_slice() } /// An extension trait for [`SyntaxNode`]s, [`SyntaxToken`]s, and @@ -877,29 +874,29 @@ pub trait SyntaxExt { results.into_boxed_slice() } - /// Gets all of the preceding comments for an element. - fn preceding_trivia(&self) -> Box<[String]> + /// Gets all of the substantial preceding trivia for an element. + fn preceding_trivia(&self) -> Box<[SyntaxToken]> where Self: Sized, { - gather_trivia(self, Direction::Prev, false) + gather_substantial_trivia(self, Direction::Prev, false) } - /// Gets all of the succeeding comments for an element. - fn succeeding_comments(&self) -> Box<[String]> + /// Gets all of the substantial succeeding trivia for an element. + fn succeeding_trivia(&self) -> Box<[SyntaxToken]> where Self: Sized, { - gather_trivia(self, Direction::Next, false) + gather_substantial_trivia(self, Direction::Next, false) } /// Get any inline comment directly following an element on the /// same line. - fn inline_comment(&self) -> Option + fn inline_comment(&self) -> Option where Self: Sized, { - gather_trivia(self, Direction::Next, true) + gather_substantial_trivia(self, Direction::Next, true) // NOTE: at most, there can be one contiguous comment on a line. .first() .cloned() @@ -965,7 +962,7 @@ task foo {} # This comment should not be included # comments # are # long - + # Others are short # and, yet another @@ -978,17 +975,18 @@ workflow foo {} # This should not be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - assert_eq!(workflow.preceding_trivia().as_ref(), vec![ - "\n", - "# Some", - "# comments", - "# are", - "# long", - "\n", - "# Others are short", - "\n", - "# and, yet another" - ]); + let trivia = workflow.preceding_trivia(); + let mut trivia_iter = trivia.iter(); + assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); + assert_eq!(trivia_iter.next().unwrap().text(), "# Some"); + assert_eq!(trivia_iter.next().unwrap().text(), "# comments"); + assert_eq!(trivia_iter.next().unwrap().text(), "# are"); + assert_eq!(trivia_iter.next().unwrap().text(), "# long"); + assert_eq!(trivia_iter.next().unwrap().text(), "\n \n"); + assert_eq!(trivia_iter.next().unwrap().text(), "# Others are short"); + assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); + assert_eq!(trivia_iter.next().unwrap().text(), "# and, yet another"); + assert!(trivia_iter.next().is_none()); } #[test] @@ -1009,11 +1007,12 @@ workflow foo {} # Here is a comment that should be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - assert_eq!(workflow.succeeding_comments().as_ref(), vec![ - "# Here is a comment that should be collected.", - "\n", - "# This comment should be included too." - ]); + let trivia = workflow.succeeding_trivia(); + let mut trivia_iter = trivia.iter(); + assert_eq!(trivia_iter.next().unwrap().text(), "# Here is a comment that should be collected."); + assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); + assert_eq!(trivia_iter.next().unwrap().text(), "# This comment should be included too."); + assert!(trivia_iter.next().is_none()); } #[test] @@ -1034,9 +1033,7 @@ workflow foo {} # Here is a comment that should be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - assert_eq!( - workflow.inline_comment().as_deref(), - Some("# Here is a comment that should be collected.") - ); + let comment = workflow.inline_comment().unwrap(); + assert_eq!(comment.text(), "# Here is a comment that should be collected."); } } From e12891372373e43546e5dbb13dd8dc6904906e48 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Sat, 5 Oct 2024 20:20:51 -0400 Subject: [PATCH 09/60] chore: cargo fmt --- wdl-format/src/v1/workflow/call.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs index 9c794f722..6bdb92ae2 100644 --- a/wdl-format/src/v1/workflow/call.rs +++ b/wdl-format/src/v1/workflow/call.rs @@ -55,7 +55,7 @@ pub fn format_call_target(element: &FormatElement, stream: &mut TokenStream Date: Sun, 6 Oct 2024 18:35:30 -0400 Subject: [PATCH 10/60] [WIP] --- wdl-ast/src/lib.rs | 1 + wdl-format/src/element.rs | 82 +--------- wdl-format/src/lib.rs | 25 ++- wdl-format/src/token/post.rs | 121 ++++++++++++--- wdl-format/src/token/pre.rs | 113 +++++++++++--- wdl-format/src/v1.rs | 10 +- wdl-format/src/v1/task.rs | 16 +- wdl-format/src/v1/workflow.rs | 11 ++ wdl-format/src/v1/workflow/call.rs | 8 + wdl-grammar/src/tree.rs | 236 +++++++++++++++++------------ 10 files changed, 395 insertions(+), 228 deletions(-) diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index 99bdcbece..c279133f5 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -53,6 +53,7 @@ pub use wdl_grammar::SyntaxExt; pub use wdl_grammar::SyntaxKind; pub use wdl_grammar::SyntaxNode; pub use wdl_grammar::SyntaxToken; +pub use wdl_grammar::SyntaxTokenExt; pub use wdl_grammar::SyntaxTree; pub use wdl_grammar::ToSpan; pub use wdl_grammar::WorkflowDescriptionLanguage; diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index 7095f515e..e3449ddb4 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -1,7 +1,6 @@ //! Elements used during formatting. use std::collections::HashMap; -use std::iter::Peekable; use nonempty::NonEmpty; use wdl_ast::Element; @@ -10,40 +9,6 @@ use wdl_ast::SyntaxKind; pub mod node; -/// Trivia associated with a token. -/// -/// Trivia would be things like comments and whitespace. -#[derive(Clone, Debug, Default)] -pub struct Trivia { - /// Any preceding trivia. - preceding: Option>>, - - /// Any inline trivia. - inline: Option>>, -} - -impl Trivia { - /// Any preceding trivia that are not whitespaces. - pub fn preceding(&self) -> Option> { - self.preceding.as_ref().map(|trivia| { - trivia - .into_iter() - .filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) - .map(|t| &**t) - }) - } - - /// Any inline trivia that are not whitespaces. - pub fn inline(&self) -> Option> { - self.inline.as_ref().map(|trivia| { - trivia - .into_iter() - .filter(|t| !matches!(t.element().kind(), SyntaxKind::Whitespace)) - .map(|t| &**t) - }) - } -} - /// A formattable element. #[derive(Clone, Debug)] pub struct FormatElement { @@ -118,40 +83,6 @@ impl AstElementFormatExt for Element { } } -/// Collects a list of iterables into an [`Option`]. -fn collect_optional(mut iter: impl Iterator) -> Option> { - if let Some(first) = iter.next() { - let mut vec = NonEmpty::new(first); - vec.extend(iter); - Some(vec) - } else { - None - } -} - -/// Takes elements while a particular predicate is true _without_ consuming the -/// element that breaks the chain. -fn take_while_peek<'a, I, P>( - iter: &'a mut Peekable, - predicate: P, -) -> impl Iterator + 'a -where - I: Iterator, - P: Fn(&I::Item) -> bool + 'a, -{ - std::iter::from_fn(move || { - if let Some(next_item) = iter.peek() { - if predicate(next_item) { - iter.next() - } else { - None - } - } else { - None - } - }) -} - /// Collates the children of a particular node. fn collate(node: &Node) -> Option>> { let mut results = Vec::new(); @@ -204,20 +135,21 @@ mod tests { #[test] fn smoke() { let (document, diagnostics) = Document::parse( - "version 1.2 + "## WDL +version 1.2 # This is a comment attached to the version. -# This is a comment attached to the task. +# This is a comment attached to the task keyword. task foo # This is an inline comment on the task ident. { -} # This is an inline comment on the task. +} # This is an inline comment on the task close brace. -# This is a comment attached to the workflow. +# This is a comment attached to the workflow keyword. workflow bar # This is an inline comment on the workflow ident. { - # This is attached to the call. + # This is attached to the call keyword. call foo {} -} # This is an inline comment on the workflow.", +} # This is an inline comment on the workflow close brace.", ); assert!(diagnostics.is_empty()); diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index 98c40560c..255641a74 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -199,25 +199,34 @@ mod tests { #[test] fn smoke() { let (document, diagnostics) = Document::parse( - "version 1.2 + "## WDL +version 1.2 # This is a comment attached to the version. -# This is a comment attached to the task. +# This is a comment attached to the task keyword. task foo # This is an inline comment on the task ident. { -} # This is an inline comment on the task. +} # This is an inline comment on the task close brace. -# This is a comment attached to the workflow. +# This is a comment attached to the workflow keyword. workflow bar # This is an inline comment on the workflow ident. { - # This is attached to the call. + # This is attached to the call keyword. call foo {} -} # This is an inline comment on the workflow.", +} # This is an inline comment on the workflow close brace.", ); assert!(diagnostics.is_empty()); let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); - let stream = Formatter::default().to_stream(&document).to_string(); - println!("{stream}"); + let formatter = Formatter::default(); + let result = formatter.format(&document); + match result { + Ok(s) => { + print!("{}", s); + } + Err(err) => { + panic!("failed to format document: {}", err); + } + } } } diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 20b7f2971..c67f2e900 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -5,6 +5,7 @@ use wdl_ast::SyntaxKind; +use crate::CommentKind; use crate::NEWLINE; use crate::PreToken; use crate::SPACE; @@ -25,6 +26,9 @@ pub enum PostToken { /// A newline. Newline, + /// One indentation. + Indent, + /// A string literal. Literal(String), } @@ -34,6 +38,7 @@ impl std::fmt::Debug for PostToken { match self { Self::Space => write!(f, ""), Self::Newline => write!(f, ""), + Self::Indent => write!(f, ""), Self::Literal(value) => write!(f, " {value}"), } } @@ -44,6 +49,7 @@ impl std::fmt::Display for PostToken { match self { PostToken::Space => write!(f, "{SPACE}"), PostToken::Newline => write!(f, "{NEWLINE}"), + PostToken::Indent => write!(f, " "), // 4 spaces TODO replace PostToken::Literal(value) => write!(f, "{value}"), } } @@ -51,10 +57,10 @@ impl std::fmt::Display for PostToken { impl Token for PostToken {} -/// The state of the postprocessor. +/// Current position in a line. #[derive(Default, Eq, PartialEq)] -enum State { - /// The start of a line in the document. +enum LinePosition { + /// The start of a line. #[default] StartOfLine, @@ -64,7 +70,13 @@ enum State { /// A postprocessor of [tokens](PreToken). #[derive(Default)] -pub struct Postprocessor(State); +pub struct Postprocessor { + /// The current position in the line. + position: LinePosition, + + /// The current indentation level. + indent_level: usize, +} impl Postprocessor { /// Runs the postprocessor. @@ -75,7 +87,8 @@ impl Postprocessor { self.step(token, &mut output) } - output.trim_while(|token| matches!(token, PostToken::Space | PostToken::Newline)); + self.trim_whitespace(&mut output); + output.push(PostToken::Newline); output.push(PostToken::Newline); output @@ -85,34 +98,92 @@ impl Postprocessor { /// [`PostToken`]s. pub fn step(&mut self, token: PreToken, stream: &mut TokenStream) { match token { - PreToken::SectionSpacer => { - if self.0 != State::StartOfLine { - self.newline(stream) - } + PreToken::BlankLine => { + self.trim_whitespace(stream); + stream.push(PostToken::Newline); + stream.push(PostToken::Newline); + } + PreToken::LineEnd => { + self.end_line(stream); + } + PreToken::WordEnd => { + stream.trim_end(&PostToken::Space); - self.newline(stream); + if self.position == LinePosition::MiddleOfLine { + stream.push(PostToken::Space); + } else { + // We're at the start of a line, so we don't need to add a + // space. + } + } + PreToken::IndentStart => { + self.indent_level += 1; + self.end_line(stream); + } + PreToken::IndentEnd => { + self.indent_level = self.indent_level.saturating_sub(1); + self.end_line(stream); } PreToken::Literal(value, kind) => { - match self.0 { - State::StartOfLine | State::MiddleOfLine => { - stream.push(PostToken::Literal(value)); + assert!(kind != SyntaxKind::Comment); + stream.push(PostToken::Literal(value)); + self.position = LinePosition::MiddleOfLine; + } + PreToken::Comment(value, kind) => { + match kind { + CommentKind::Inline => { + assert!(self.position == LinePosition::MiddleOfLine); + stream.trim_end(&PostToken::Space); + stream.push(PostToken::Space); + stream.push(PostToken::Space); + } + CommentKind::Preceding => { + self.end_line(stream); } } - - if kind == SyntaxKind::Comment { - self.newline(stream); - } else { - stream.push(PostToken::Space); - self.0 = State::MiddleOfLine; - } + stream.push(PostToken::Literal(value)); + self.position = LinePosition::MiddleOfLine; + self.end_line(stream); } } } - /// Adds a newline to the stream and modifies the state accordingly. - fn newline(&mut self, stream: &mut TokenStream) { - stream.trim_end(&PostToken::Space); - stream.push(PostToken::Newline); - self.0 = State::StartOfLine; + /// Trims any and all whitespace from the end of the stream. + fn trim_whitespace(&mut self, stream: &mut TokenStream) { + stream.trim_while(|token| { + matches!( + token, + PostToken::Space | PostToken::Newline | PostToken::Indent + ) + }); + } + + /// Trims spaces and indents (and not newlines) from the end of the stream. + fn trim_last_line(&mut self, stream: &mut TokenStream) { + stream.trim_while(|token| matches!(token, PostToken::Space | PostToken::Indent)); + } + + /// Ends the current line. + /// + /// Removes any trailing spaces or indents and adds a newline only if state + /// is not [`LinePosition::StartOfLine`]. State is then set to + /// [`LinePosition::StartOfLine`]. Safe to call multiple times in a row. + fn end_line(&mut self, stream: &mut TokenStream) { + self.trim_last_line(stream); + if self.position != LinePosition::StartOfLine { + stream.push(PostToken::Newline); + } + self.position = LinePosition::StartOfLine; + self.indent(stream); + } + + /// Pushes the current indentation level to the stream. + /// This should only be called when the state is + /// [`LinePosition::StartOfLine`]. + fn indent(&self, stream: &mut TokenStream) { + assert!(self.position == LinePosition::StartOfLine); + for _ in 0..self.indent_level { + stream.push(PostToken::Indent); + } } } diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 851e93d3c..f5ebfef37 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -1,11 +1,22 @@ //! Tokens emitted during the formatting of particular elements. -use wdl_ast::SyntaxExt; use wdl_ast::SyntaxKind; +use wdl_ast::SyntaxTokenExt; use crate::Token; use crate::TokenStream; +/// The kind of comment. +#[derive(Debug, Eq, PartialEq)] +pub enum CommentKind { + /// A comment on it's own line, indented to the same level as the code + /// following it. + Preceding, + + /// A comment on the same line as the code preceding it. + Inline, +} + /// A token that can be written by elements. /// /// These are tokens that are intended to be written directly by elements to a @@ -16,11 +27,26 @@ use crate::TokenStream; /// expected to write [`PostToken`](super::PostToken)s directly). #[derive(Debug, Eq, PartialEq)] pub enum PreToken { - /// A section spacer. - SectionSpacer, + /// The end of a section. + BlankLine, + + /// The end of a line. + LineEnd, + + /// The end of a word. + WordEnd, + + /// The start of an indented block. + IndentStart, - /// Includes text literally in the output. + /// The end of an indented block. + IndentEnd, + + /// Literal text. Literal(String, SyntaxKind), + + /// A comment. + Comment(String, CommentKind), } impl PreToken { @@ -28,19 +54,28 @@ impl PreToken { /// [`PreToken::Literal`]. pub fn kind(&self) -> Option<&SyntaxKind> { match self { + PreToken::BlankLine => None, + PreToken::LineEnd => None, + PreToken::WordEnd => None, + PreToken::IndentStart => None, + PreToken::IndentEnd => None, PreToken::Literal(_, kind) => Some(kind), - _ => None, + PreToken::Comment(..) => None, } } } /// The line length to use when displaying pretokens. -const DISPLAY_LINE_LENGTH: usize = 88; +const DISPLAY_LINE_LENGTH: usize = 90; impl std::fmt::Display for PreToken { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - PreToken::SectionSpacer => write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)), + PreToken::BlankLine => write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)), + PreToken::LineEnd => write!(f, ""), + PreToken::WordEnd => write!(f, ""), + PreToken::IndentStart => write!(f, ""), + PreToken::IndentEnd => write!(f, ""), PreToken::Literal(value, kind) => { write!( f, @@ -50,6 +85,19 @@ impl std::fmt::Display for PreToken { width = DISPLAY_LINE_LENGTH ) } + PreToken::Comment(value, kind) => { + let kind = match kind { + CommentKind::Preceding => "Preceding", + CommentKind::Inline => "Inline", + }; + write!( + f, + "{:width$}", + value, + kind, + width = DISPLAY_LINE_LENGTH + ) + } } } } @@ -57,9 +105,35 @@ impl std::fmt::Display for PreToken { impl Token for PreToken {} impl TokenStream { - /// Inserts an element spacer to the stream. - pub fn section_spacer(&mut self) { - self.0.push(PreToken::SectionSpacer); + /// Inserts a blank line token to the stream if the stream does not already + /// end with a blank line. Multiple blank lines are not allowed. + pub fn blank_line(&mut self) { + self.trim_end(&PreToken::BlankLine); + self.0.push(PreToken::BlankLine); + } + + /// Inserts an end of line token to the stream if the stream does not + /// already end with an end of line token. + pub fn end_line(&mut self) { + self.trim_end(&PreToken::LineEnd); + self.0.push(PreToken::LineEnd); + } + + /// Inserts a word end token to the stream if the stream does not already + /// end with a word end token. + pub fn end_word(&mut self) { + self.trim_end(&PreToken::WordEnd); + self.0.push(PreToken::WordEnd); + } + + /// Inserts an indent start token to the stream. + pub fn increment_indent(&mut self) { + self.0.push(PreToken::IndentStart); + } + + /// Inserts an indent end token to the stream. + pub fn decrement_indent(&mut self) { + self.0.push(PreToken::IndentEnd); } /// Pushes an AST token into the stream. @@ -74,21 +148,22 @@ impl TokenStream { if !kind.is_trivia() { let preceding_trivia = syntax.preceding_trivia(); for token in preceding_trivia { - let trivia = match token.kind() { - SyntaxKind::Whitespace => { - PreToken::Literal(String::from("\n"), SyntaxKind::Whitespace) - } + match token.kind() { + SyntaxKind::Whitespace => self.blank_line(), SyntaxKind::Comment => { - PreToken::Literal(token.text().to_owned(), SyntaxKind::Comment) + let comment = PreToken::Comment( + token.text().trim_end().to_owned(), + CommentKind::Preceding, + ); + self.0.push(comment); } _ => unreachable!("unexpected trivia: {:?}", token), }; - self.0.push(trivia); } if let Some(token) = syntax.inline_comment() { - inline_comment = Some(PreToken::Literal( - token.text().to_owned(), - SyntaxKind::Comment, + inline_comment = Some(PreToken::Comment( + token.text().trim_end().to_owned(), + CommentKind::Inline, )); } } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index fcbac2143..1edd88ceb 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -22,19 +22,19 @@ pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { (&version).write(stream); } - stream.section_spacer(); + stream.blank_line(); if let Some(tasks) = children.remove(&SyntaxKind::TaskDefinitionNode) { for task in tasks { (&task).write(stream); - stream.section_spacer(); + stream.blank_line(); } } if let Some(workflows) = children.remove(&SyntaxKind::WorkflowDefinitionNode) { for workflow in workflows { (&workflow).write(stream); - stream.section_spacer(); + stream.blank_line(); } } @@ -52,11 +52,15 @@ pub fn format_version_statement(element: &FormatElement, stream: &mut TokenStrea (&keyword).write(stream); } + stream.end_word(); + if let Some(mut versions) = children.remove(&SyntaxKind::Version) { let version = exactly_one!(versions, "versions"); (&version).write(stream); } + stream.end_line(); + if !children.is_empty() { todo!( "unhandled children for version statement: {:#?}", diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index 7450c1086..08fabbe0a 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -17,21 +17,33 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< (&keyword).write(stream); } + stream.end_word(); + if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { - let idents = exactly_one!(idents, "idents"); - (&idents).write(stream); + let ident = exactly_one!(idents, "idents"); + (&ident).write(stream); } + stream.end_word(); + if let Some(mut braces) = children.remove(&SyntaxKind::OpenBrace) { let brace = exactly_one!(braces, "open braces"); (&brace).write(stream); } + stream.end_line(); + stream.increment_indent(); + + // TODO: Implement task body formatting. + stream.decrement_indent(); + if let Some(mut braces) = children.remove(&SyntaxKind::CloseBrace) { let brace = exactly_one!(braces, "closed braces"); (&brace).write(stream); } + stream.end_line(); + if !children.is_empty() { todo!( "unhandled children for task definition: {:#?}", diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index 488d2ac03..9f3635412 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -19,25 +19,36 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr (&keyword).write(stream); } + stream.end_word(); + if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { let idents = exactly_one!(idents, "idents"); (&idents).write(stream); } + stream.end_word(); + if let Some(mut braces) = children.remove(&SyntaxKind::OpenBrace) { let brace = exactly_one!(braces, "open braces"); (&brace).write(stream); } + stream.end_line(); + stream.increment_indent(); + if let Some(calls) = children.remove(&SyntaxKind::CallStatementNode) { for call in calls { (&call).write(stream); + stream.end_line(); } } + stream.decrement_indent(); + if let Some(mut braces) = children.remove(&SyntaxKind::CloseBrace) { let brace = exactly_one!(braces, "closed braces"); (&brace).write(stream); + stream.end_line(); } if !children.is_empty() { diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs index 6bdb92ae2..8526268c2 100644 --- a/wdl-format/src/v1/workflow/call.rs +++ b/wdl-format/src/v1/workflow/call.rs @@ -17,21 +17,29 @@ pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream

( - source: &T, +/// Whitespace is only considered substantial if it contains more than one +/// newline and is between comments. Comments are always considered substantial. +fn gather_substantial_trivia( + source: &SyntaxToken, direction: Direction, break_on_newline: bool, ) -> Box<[SyntaxToken]> { - let iter = source.siblings_with_tokens(direction); - /// Adds the token to the currently collecting buffer in the right place /// depending in the direction we are traversing. fn push_results( @@ -742,71 +739,83 @@ fn gather_substantial_trivia( } } - let trivia = iter - .skip_while(|e| source.matches(e)) - .take_while(|e| e.kind().is_trivia()) - .fold_while(VecDeque::new(), |mut results, e| { - match e.kind() { - SyntaxKind::Comment => { - // Check if e is a comment on its own line. - // If direction is 'Next' then we already know that the - // comment is on its own line. - if direction == Direction::Prev { - if let Some(prev) = e.prev_sibling_or_token() { - if prev.kind() == SyntaxKind::Whitespace { - let newlines = prev - .clone() - .into_token() - .expect("whitespace should always be a token") - .text() - .chars() - .filter(|c| *c == '\n') - .count(); - - // If there are newlines in 'prev' then we know - // that the comment is on its own line. - // The comment may still be on its own line if - // 'prev' does not have newlines and nothing comes - // before 'prev'. - if newlines == 0 && prev.prev_sibling_or_token().is_some() { - return FoldWhile::Done(results); - } - } else { - // There is something else on this line before the comment. - return FoldWhile::Done(results); + let mut results = VecDeque::new(); + let mut cur = match direction { + Direction::Next => source.next_token(), + Direction::Prev => source.prev_token(), + }; + while let Some(t) = cur { + if !t.kind().is_trivia() { + break; + } + + match t.kind() { + SyntaxKind::Comment => { + // Check if t is a comment on its own line. + // If direction is 'Next' then we already know that the + // comment is on its own line. + if direction == Direction::Prev { + if let Some(prev) = t.prev_token() { + if prev.kind() == SyntaxKind::Whitespace { + let newlines = prev.text().chars().filter(|c| *c == '\n').count(); + + // If there are newlines in 'prev' then we know + // that the comment is on its own line. + // The comment may still be on its own line if + // 'prev' does not have newlines and nothing comes + // before 'prev'. + if newlines == 0 && prev.prev_token().is_some() { + break; } + } else { + // There is something else on this line before the comment. + break; } } - - let comment = e.into_token().expect("comment should always be a token"); - - push_results(comment, &mut results, &direction); } - SyntaxKind::Whitespace => { - let token = e.into_token().expect("whitespace should always be a token"); - let newlines = token.text().chars().filter(|c| *c == '\n').count(); + push_results(t.clone(), &mut results, &direction); + } + SyntaxKind::Whitespace => { + let newlines = t.text().chars().filter(|c| *c == '\n').count(); - if break_on_newline && newlines > 0 { - return FoldWhile::Done(results); - } + if break_on_newline && newlines > 0 { + break; + } - if newlines > 1 { - push_results(token, &mut results, &direction) - } + if newlines > 1 { + push_results(t.clone(), &mut results, &direction); } - // SAFETY: we just filtered out any non-comment and - // non-whitespace nodes above, so this should never occur. - _ => unreachable!(), } + // SAFETY: we just filtered out any non-comment and + // non-whitespace nodes above, so this should never occur. + _ => unreachable!(), + } + cur = match direction { + Direction::Next => t.next_token(), + Direction::Prev => t.prev_token(), + }; + } - FoldWhile::Continue(results) - }) - .into_inner(); + // // Remove leading and trailing whitespace from results. + // while let Some(t) = results.front() { + // if t.kind() == SyntaxKind::Whitespace { + // results.pop_front(); + // } else { + // break; + // } + // } + // while let Some(t) = results.back() { + // if t.kind() == SyntaxKind::Whitespace { + // results.pop_back(); + // } else { + // break; + // } + // } // NOTE: most of the time, this conversion will be O(1). Occassionally // it will be O(n). No allocations will ever be done. Thus, the // ammortized cost of this is quite cheap. - Vec::from(trivia).into_boxed_slice() + Vec::from(results).into_boxed_slice() } /// An extension trait for [`SyntaxNode`]s, [`SyntaxToken`]s, and @@ -873,34 +882,6 @@ pub trait SyntaxExt { // allocations. results.into_boxed_slice() } - - /// Gets all of the substantial preceding trivia for an element. - fn preceding_trivia(&self) -> Box<[SyntaxToken]> - where - Self: Sized, - { - gather_substantial_trivia(self, Direction::Prev, false) - } - - /// Gets all of the substantial succeeding trivia for an element. - fn succeeding_trivia(&self) -> Box<[SyntaxToken]> - where - Self: Sized, - { - gather_substantial_trivia(self, Direction::Next, false) - } - - /// Get any inline comment directly following an element on the - /// same line. - fn inline_comment(&self) -> Option - where - Self: Sized, - { - gather_substantial_trivia(self, Direction::Next, true) - // NOTE: at most, there can be one contiguous comment on a line. - .first() - .cloned() - } } impl SyntaxExt for SyntaxNode { @@ -945,6 +926,57 @@ impl SyntaxExt for SyntaxElement { } } +/// An extension trait for [`SyntaxToken`]s. +pub trait SyntaxTokenExt { + /// Gets all of the substantial preceding trivia for an element. + fn preceding_trivia(&self) -> Box<[SyntaxToken]> + where + Self: Sized, + Self: SyntaxExt; + + /// Gets all of the substantial succeeding trivia for an element. + fn succeeding_trivia(&self) -> Box<[SyntaxToken]> + where + Self: Sized, + Self: SyntaxExt; + + /// Get any inline comment directly following an element on the + /// same line. + fn inline_comment(&self) -> Option + where + Self: Sized, + Self: SyntaxExt; +} + +impl SyntaxTokenExt for SyntaxToken { + fn preceding_trivia(&self) -> Box<[SyntaxToken]> + where + Self: Sized, + Self: SyntaxExt, + { + gather_substantial_trivia(self, Direction::Prev, false) + } + + fn succeeding_trivia(&self) -> Box<[SyntaxToken]> + where + Self: Sized, + Self: SyntaxExt, + { + gather_substantial_trivia(self, Direction::Next, false) + } + + fn inline_comment(&self) -> Option + where + Self: Sized, + Self: SyntaxExt, + { + gather_substantial_trivia(self, Direction::Next, true) + // NOTE: at most, there can be one contiguous comment on a line. + .first() + .cloned() + } +} + #[cfg(test)] mod tests { use super::*; @@ -975,7 +1007,7 @@ workflow foo {} # This should not be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - let trivia = workflow.preceding_trivia(); + let trivia = workflow.first_token().unwrap().preceding_trivia(); let mut trivia_iter = trivia.iter(); assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); assert_eq!(trivia_iter.next().unwrap().text(), "# Some"); @@ -985,7 +1017,10 @@ workflow foo {} # This should not be collected. assert_eq!(trivia_iter.next().unwrap().text(), "\n \n"); assert_eq!(trivia_iter.next().unwrap().text(), "# Others are short"); assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); - assert_eq!(trivia_iter.next().unwrap().text(), "# and, yet another"); + assert_eq!( + trivia_iter.next().unwrap().text(), + "# and, yet another" + ); assert!(trivia_iter.next().is_none()); } @@ -1007,11 +1042,17 @@ workflow foo {} # Here is a comment that should be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - let trivia = workflow.succeeding_trivia(); + let trivia = workflow.last_token().unwrap().succeeding_trivia(); let mut trivia_iter = trivia.iter(); - assert_eq!(trivia_iter.next().unwrap().text(), "# Here is a comment that should be collected."); + assert_eq!( + trivia_iter.next().unwrap().text(), + "# Here is a comment that should be collected." + ); assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); - assert_eq!(trivia_iter.next().unwrap().text(), "# This comment should be included too."); + assert_eq!( + trivia_iter.next().unwrap().text(), + "# This comment should be included too." + ); assert!(trivia_iter.next().is_none()); } @@ -1033,7 +1074,10 @@ workflow foo {} # Here is a comment that should be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - let comment = workflow.inline_comment().unwrap(); - assert_eq!(comment.text(), "# Here is a comment that should be collected."); + let comment = workflow.last_token().unwrap().inline_comment().unwrap(); + assert_eq!( + comment.text(), + "# Here is a comment that should be collected." + ); } } From c8ac499006294f907360308970c250c00fad1cd7 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 7 Oct 2024 11:35:15 -0400 Subject: [PATCH 11/60] [WIP] --- wdl-format/src/element.rs | 25 +++---- wdl-format/src/lib.rs | 4 +- wdl-format/src/token.rs | 32 +++++++++ wdl-format/src/token/post.rs | 65 ++++++++++++------- wdl-format/src/token/pre.rs | 122 ++++++++++++++++++++--------------- wdl-format/src/v1.rs | 9 +++ wdl-format/src/v1/import.rs | 69 ++++++++++++++++++++ 7 files changed, 231 insertions(+), 95 deletions(-) create mode 100644 wdl-format/src/v1/import.rs diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index e3449ddb4..695762a5d 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -86,24 +86,15 @@ impl AstElementFormatExt for Element { /// Collates the children of a particular node. fn collate(node: &Node) -> Option>> { let mut results = Vec::new(); - let mut stream = node - .syntax() - .children_with_tokens() - .filter_map(|syntax| { - if syntax.kind().is_trivia() { - None - } else { - Some(Element::cast(syntax)) - } - }) - .peekable(); - - while stream.peek().is_some() { - let element = match stream.next() { - Some(node) => node, - None => break, - }; + let stream = node.syntax().children_with_tokens().filter_map(|syntax| { + if syntax.kind().is_trivia() { + None + } else { + Some(Element::cast(syntax)) + } + }); + for element in stream { let children = match element { Element::Node(ref node) => collate(node), Element::Token(_) => None, diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index 255641a74..a6374f1f6 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -71,8 +71,8 @@ impl Writable for &FormatElement { AstNode::GreaterEqualExpr(_) => todo!(), AstNode::GreaterExpr(_) => todo!(), AstNode::IfExpr(_) => todo!(), - AstNode::ImportAlias(_) => todo!(), - AstNode::ImportStatement(_) => todo!(), + AstNode::ImportAlias(_) => v1::import::format_import_alias(self, stream), + AstNode::ImportStatement(_) => v1::import::format_import_statement(self, stream), AstNode::IndexExpr(_) => todo!(), AstNode::InequalityExpr(_) => todo!(), AstNode::InputSection(_) => todo!(), diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 82d17003c..d23eedbcd 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -67,3 +67,35 @@ impl IntoIterator for TokenStream { self.0.into_iter() } } + +/// The kind of comment. +#[derive(Debug, Eq, PartialEq)] +pub enum Comment { + /// A comment on it's own line, indented to the same level as the code + /// following it. + Preceding(String), + + /// A comment on the same line as the code preceding it. + Inline(String), +} + +/// Trivia. +#[derive(Debug, Eq, PartialEq)] +pub enum Trivia { + /// A blank line. This may be ignored by the postprocessor. + BlankLine, + /// A comment. + Comment(Comment), +} + +/// Whether blank lines are allowed in the current context. +#[derive(Eq, PartialEq, Default, Debug, Clone, Copy)] +pub enum BlankLinesAllowed { + /// Blank lines are allowed between comments. + #[default] + BetweenComments, + /// Blank lines are always allowed. + Yes, + /// Blank lines are not allowed. + No, +} diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index c67f2e900..fd3bfa518 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -5,19 +5,16 @@ use wdl_ast::SyntaxKind; -use crate::CommentKind; +use crate::Comment; use crate::NEWLINE; use crate::PreToken; use crate::SPACE; use crate::Token; use crate::TokenStream; +use crate::Trivia; +use crate::BlankLinesAllowed; /// A postprocessed token. -/// -/// Note that this will transformed into a [`TokenStream`](super::TokenStream) -/// of [`PostToken`](super::PostToken)s by a -/// [`Postprocessor`](super::Postprocessor) (authors of elements are never -/// expected to write [`PostToken`](super::PostToken)s directly). #[derive(Eq, PartialEq)] pub enum PostToken { /// A space. @@ -76,6 +73,9 @@ pub struct Postprocessor { /// The current indentation level. indent_level: usize, + + /// Whether blank lines are allowed in the current context. + blank_lines_allowed: BlankLinesAllowed, } impl Postprocessor { @@ -83,22 +83,29 @@ impl Postprocessor { pub fn run(&mut self, input: TokenStream) -> TokenStream { let mut output = TokenStream::::default(); - for token in input { - self.step(token, &mut output) + let mut stream = input.iter().peekable(); + while let Some(token) = stream.next() { + self.step(token, stream.peek().cloned(), &mut output) } self.trim_whitespace(&mut output); output.push(PostToken::Newline); - output.push(PostToken::Newline); output } /// Takes a step of a [`PreToken`] stream and processes the appropriate /// [`PostToken`]s. - pub fn step(&mut self, token: PreToken, stream: &mut TokenStream) { + pub fn step( + &mut self, + token: &PreToken, + _next: Option<&PreToken>, + stream: &mut TokenStream, + ) { + dbg!(token); match token { PreToken::BlankLine => { + assert!(self.blank_lines_allowed != BlankLinesAllowed::No); self.trim_whitespace(stream); stream.push(PostToken::Newline); stream.push(PostToken::Newline); @@ -124,27 +131,39 @@ impl Postprocessor { self.indent_level = self.indent_level.saturating_sub(1); self.end_line(stream); } + PreToken::BlankLinesContext(context) => { + self.blank_lines_allowed = *context; + } PreToken::Literal(value, kind) => { - assert!(kind != SyntaxKind::Comment); - stream.push(PostToken::Literal(value)); + assert!(*kind != SyntaxKind::Comment); + stream.push(PostToken::Literal(value.to_owned())); self.position = LinePosition::MiddleOfLine; } - PreToken::Comment(value, kind) => { - match kind { - CommentKind::Inline => { + PreToken::Trivia(trivia) => match trivia { + Trivia::BlankLine => { + if self.blank_lines_allowed == BlankLinesAllowed::Yes { + self.trim_whitespace(stream); + stream.push(PostToken::Newline); + stream.push(PostToken::Newline); + } + } + Trivia::Comment(comment) => match comment { + Comment::Preceding(value) => { + self.end_line(stream); + stream.push(PostToken::Literal(value.to_owned())); + self.position = LinePosition::MiddleOfLine; + self.end_line(stream); + } + Comment::Inline(value) => { assert!(self.position == LinePosition::MiddleOfLine); - stream.trim_end(&PostToken::Space); + self.trim_last_line(stream); stream.push(PostToken::Space); stream.push(PostToken::Space); - } - CommentKind::Preceding => { + stream.push(PostToken::Literal(value.to_owned())); self.end_line(stream); } - } - stream.push(PostToken::Literal(value)); - self.position = LinePosition::MiddleOfLine; - self.end_line(stream); - } + }, + }, } } diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index f5ebfef37..7d741674f 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -3,19 +3,11 @@ use wdl_ast::SyntaxKind; use wdl_ast::SyntaxTokenExt; +use crate::Comment; use crate::Token; use crate::TokenStream; - -/// The kind of comment. -#[derive(Debug, Eq, PartialEq)] -pub enum CommentKind { - /// A comment on it's own line, indented to the same level as the code - /// following it. - Preceding, - - /// A comment on the same line as the code preceding it. - Inline, -} +use crate::Trivia; +use crate::BlankLinesAllowed; /// A token that can be written by elements. /// @@ -27,7 +19,7 @@ pub enum CommentKind { /// expected to write [`PostToken`](super::PostToken)s directly). #[derive(Debug, Eq, PartialEq)] pub enum PreToken { - /// The end of a section. + /// A blank line. BlankLine, /// The end of a line. @@ -42,27 +34,14 @@ pub enum PreToken { /// The end of an indented block. IndentEnd, + /// The context for blank lines. + BlankLinesContext(BlankLinesAllowed), + /// Literal text. Literal(String, SyntaxKind), - /// A comment. - Comment(String, CommentKind), -} - -impl PreToken { - /// Gets the [`SyntaxKind`] of the token if the token is a - /// [`PreToken::Literal`]. - pub fn kind(&self) -> Option<&SyntaxKind> { - match self { - PreToken::BlankLine => None, - PreToken::LineEnd => None, - PreToken::WordEnd => None, - PreToken::IndentStart => None, - PreToken::IndentEnd => None, - PreToken::Literal(_, kind) => Some(kind), - PreToken::Comment(..) => None, - } - } + /// Trivia. + Trivia(Trivia), } /// The line length to use when displaying pretokens. @@ -76,6 +55,9 @@ impl std::fmt::Display for PreToken { PreToken::WordEnd => write!(f, ""), PreToken::IndentStart => write!(f, ""), PreToken::IndentEnd => write!(f, ""), + PreToken::BlankLinesContext(context) => { + write!(f, "", context) + } PreToken::Literal(value, kind) => { write!( f, @@ -85,19 +67,29 @@ impl std::fmt::Display for PreToken { width = DISPLAY_LINE_LENGTH ) } - PreToken::Comment(value, kind) => { - let kind = match kind { - CommentKind::Preceding => "Preceding", - CommentKind::Inline => "Inline", - }; - write!( - f, - "{:width$}", - value, - kind, - width = DISPLAY_LINE_LENGTH - ) - } + PreToken::Trivia(trivia) => match trivia { + Trivia::BlankLine => { + write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)) + } + Trivia::Comment(comment) => match comment { + Comment::Preceding(value) => { + write!( + f, + "{:width$}", + value, + width = DISPLAY_LINE_LENGTH + ) + } + Comment::Inline(value) => { + write!( + f, + "{:width$}", + value, + width = DISPLAY_LINE_LENGTH + ) + } + }, + }, } } } @@ -106,16 +98,19 @@ impl Token for PreToken {} impl TokenStream { /// Inserts a blank line token to the stream if the stream does not already - /// end with a blank line. Multiple blank lines are not allowed. + /// end with a blank line. This will replace any [`Trivia::BlankLine`] + /// tokens with [`PreToken::BlankLine`]. pub fn blank_line(&mut self) { - self.trim_end(&PreToken::BlankLine); + self.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); self.0.push(PreToken::BlankLine); } /// Inserts an end of line token to the stream if the stream does not /// already end with an end of line token. + /// + /// This will also trim any trailing [`PreToken::WordEnd`] tokens. pub fn end_line(&mut self) { - self.trim_end(&PreToken::LineEnd); + self.trim_while(|t| matches!(t, PreToken::WordEnd | PreToken::LineEnd)); self.0.push(PreToken::LineEnd); } @@ -136,6 +131,21 @@ impl TokenStream { self.0.push(PreToken::IndentEnd); } + /// Inserts a blank lines allowed context change. + pub fn blank_lines_allowed(&mut self) { + self.0.push(PreToken::BlankLinesContext(BlankLinesAllowed::Yes)); + } + + /// Inserts a blank lines disallowed context change. + pub fn blank_lines_disallowed(&mut self) { + self.0.push(PreToken::BlankLinesContext(BlankLinesAllowed::No)); + } + + /// Inserts a blank lines allowed between comments context change. + pub fn blank_lines_allowed_between_comments(&mut self) { + self.0.push(PreToken::BlankLinesContext(BlankLinesAllowed::BetweenComments)); + } + /// Pushes an AST token into the stream. /// /// This will also push any preceding or inline trivia into the stream. @@ -149,23 +159,29 @@ impl TokenStream { let preceding_trivia = syntax.preceding_trivia(); for token in preceding_trivia { match token.kind() { - SyntaxKind::Whitespace => self.blank_line(), + SyntaxKind::Whitespace => { + if !self.0.last().map_or(false, |t| { + matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine)) + }) { + self.0.push(PreToken::Trivia(Trivia::BlankLine)); + } + } SyntaxKind::Comment => { - let comment = PreToken::Comment( + let comment = PreToken::Trivia(Trivia::Comment(Comment::Preceding( token.text().trim_end().to_owned(), - CommentKind::Preceding, - ); + ))); self.0.push(comment); } _ => unreachable!("unexpected trivia: {:?}", token), }; } if let Some(token) = syntax.inline_comment() { - inline_comment = Some(PreToken::Comment( + inline_comment = Some(PreToken::Trivia(Trivia::Comment(Comment::Inline( token.text().trim_end().to_owned(), - CommentKind::Inline, - )); + )))); } + } else { + unreachable!("unexpected trivia: {:?}", syntax); } let token = PreToken::Literal(syntax.text().to_owned(), kind); self.0.push(token); diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index 1edd88ceb..df5533ad7 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -2,6 +2,7 @@ use wdl_ast::SyntaxKind; +pub mod import; pub mod task; pub mod workflow; @@ -24,6 +25,14 @@ pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { stream.blank_line(); + if let Some(imports) = children.remove(&SyntaxKind::ImportStatementNode) { + for import in imports { + (&import).write(stream); + } + } + + stream.blank_line(); + if let Some(tasks) = children.remove(&SyntaxKind::TaskDefinitionNode) { for task in tasks { (&task).write(stream); diff --git a/wdl-format/src/v1/import.rs b/wdl-format/src/v1/import.rs new file mode 100644 index 000000000..fe3efa43b --- /dev/null +++ b/wdl-format/src/v1/import.rs @@ -0,0 +1,69 @@ +//! Formatting for imports. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; +use crate::exactly_one; + +/// Formats an [`ImportAlias`](wdl_ast::v1::ImportAlias). +pub fn format_import_alias(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("import alias children"); + + let alias_keyword = children.next().expect("alias keyword"); + (&alias_keyword).write(stream); + + stream.end_word(); + + let real_name = children.next().expect("ident"); + (&real_name).write(stream); + + stream.end_word(); + + let as_keyword = children.next().expect("`as` keyword"); + (&as_keyword).write(stream); + + stream.end_word(); + + let alias_name = children.next().expect("ident"); + (&alias_name).write(stream); + + stream.end_word(); + + if children.next().is_some() { + todo!("unhandled children for import alias"); + } +} + +/// Formats an [`ImportStatement`](wdl_ast::v1::ImportStatement). +pub fn format_import_statement(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children_by_kind(); + + if let Some(mut import_keywords) = children.remove(&SyntaxKind::ImportKeyword) { + let import_keyword = exactly_one!(import_keywords, "import keywords"); + (&import_keyword).write(stream); + } + + stream.end_word(); + + if let Some(mut string_literals) = children.remove(&SyntaxKind::LiteralStringNode) { + let string_literal = exactly_one!(string_literals, "string literals"); + (&string_literal).write(stream); + } + + stream.end_word(); + + if let Some(import_aliases) = children.remove(&SyntaxKind::ImportAliasNode) { + for import_alias in import_aliases { + (&import_alias).write(stream); + } + } + + stream.end_line(); + + if !children.is_empty() { + todo!("unhandled children for import: {:#?}", children.keys()); + } +} From 63a4eb2406bc14d0e6ae73b4633e69eddbf1f63c Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 7 Oct 2024 17:08:41 -0400 Subject: [PATCH 12/60] [WIP] --- wdl-format/src/lib.rs | 2 +- wdl-format/src/token.rs | 8 +- wdl-format/src/token/post.rs | 22 ++-- wdl-format/src/token/pre.rs | 105 ++++++++++-------- wdl-format/src/v1.rs | 36 +++--- wdl-format/src/v1/import.rs | 57 +--------- wdl-format/src/v1/workflow.rs | 9 +- .../source.formatted.wdl | 26 +++++ .../source.formatted.wdl | 15 +++ .../source.formatted.wdl | 8 ++ .../source.formatted.wdl | 26 +++++ 11 files changed, 175 insertions(+), 139 deletions(-) create mode 100644 wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl create mode 100644 wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl create mode 100644 wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl create mode 100644 wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index a6374f1f6..5ff6973cc 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -95,7 +95,7 @@ impl Writable for &FormatElement { AstNode::LiteralOutput(_) => todo!(), AstNode::LiteralOutputItem(_) => todo!(), AstNode::LiteralPair(_) => todo!(), - AstNode::LiteralString(_) => todo!(), + AstNode::LiteralString(_) => v1::format_literal_string(self, stream), AstNode::LiteralStruct(_) => todo!(), AstNode::LiteralStructItem(_) => todo!(), AstNode::LogicalAndExpr(_) => todo!(), diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index d23eedbcd..68b7ebf00 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -88,14 +88,12 @@ pub enum Trivia { Comment(Comment), } -/// Whether blank lines are allowed in the current context. +/// Whether optional blank lines are allowed in the current context. #[derive(Eq, PartialEq, Default, Debug, Clone, Copy)] -pub enum BlankLinesAllowed { +pub enum LineSpacingPolicy { /// Blank lines are allowed between comments. - #[default] BetweenComments, /// Blank lines are always allowed. + #[default] Yes, - /// Blank lines are not allowed. - No, } diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index fd3bfa518..81cc86d35 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -6,13 +6,13 @@ use wdl_ast::SyntaxKind; use crate::Comment; +use crate::LineSpacingPolicy; use crate::NEWLINE; use crate::PreToken; use crate::SPACE; use crate::Token; use crate::TokenStream; use crate::Trivia; -use crate::BlankLinesAllowed; /// A postprocessed token. #[derive(Eq, PartialEq)] @@ -75,7 +75,7 @@ pub struct Postprocessor { indent_level: usize, /// Whether blank lines are allowed in the current context. - blank_lines_allowed: BlankLinesAllowed, + blank_lines_allowed: LineSpacingPolicy, } impl Postprocessor { @@ -83,9 +83,9 @@ impl Postprocessor { pub fn run(&mut self, input: TokenStream) -> TokenStream { let mut output = TokenStream::::default(); - let mut stream = input.iter().peekable(); + let mut stream = input.into_iter().peekable(); while let Some(token) = stream.next() { - self.step(token, stream.peek().cloned(), &mut output) + self.step(token, stream.peek(), &mut output) } self.trim_whitespace(&mut output); @@ -98,14 +98,12 @@ impl Postprocessor { /// [`PostToken`]s. pub fn step( &mut self, - token: &PreToken, + token: PreToken, _next: Option<&PreToken>, stream: &mut TokenStream, ) { - dbg!(token); match token { PreToken::BlankLine => { - assert!(self.blank_lines_allowed != BlankLinesAllowed::No); self.trim_whitespace(stream); stream.push(PostToken::Newline); stream.push(PostToken::Newline); @@ -131,20 +129,22 @@ impl Postprocessor { self.indent_level = self.indent_level.saturating_sub(1); self.end_line(stream); } - PreToken::BlankLinesContext(context) => { - self.blank_lines_allowed = *context; + PreToken::BlankLinesContext(policy) => { + self.blank_lines_allowed = policy; } PreToken::Literal(value, kind) => { - assert!(*kind != SyntaxKind::Comment); + assert!(kind != SyntaxKind::Comment); stream.push(PostToken::Literal(value.to_owned())); self.position = LinePosition::MiddleOfLine; } PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => { - if self.blank_lines_allowed == BlankLinesAllowed::Yes { + if self.blank_lines_allowed == LineSpacingPolicy::Yes { self.trim_whitespace(stream); stream.push(PostToken::Newline); stream.push(PostToken::Newline); + } else { + todo!("handle line spacing policy") } } Trivia::Comment(comment) => match comment { diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 7d741674f..0fa1d422c 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -4,10 +4,10 @@ use wdl_ast::SyntaxKind; use wdl_ast::SyntaxTokenExt; use crate::Comment; +use crate::LineSpacingPolicy; use crate::Token; use crate::TokenStream; use crate::Trivia; -use crate::BlankLinesAllowed; /// A token that can be written by elements. /// @@ -35,7 +35,7 @@ pub enum PreToken { IndentEnd, /// The context for blank lines. - BlankLinesContext(BlankLinesAllowed), + BlankLinesContext(LineSpacingPolicy), /// Literal text. Literal(String, SyntaxKind), @@ -133,17 +133,50 @@ impl TokenStream { /// Inserts a blank lines allowed context change. pub fn blank_lines_allowed(&mut self) { - self.0.push(PreToken::BlankLinesContext(BlankLinesAllowed::Yes)); - } - - /// Inserts a blank lines disallowed context change. - pub fn blank_lines_disallowed(&mut self) { - self.0.push(PreToken::BlankLinesContext(BlankLinesAllowed::No)); + self.0 + .push(PreToken::BlankLinesContext(LineSpacingPolicy::Yes)); } /// Inserts a blank lines allowed between comments context change. pub fn blank_lines_allowed_between_comments(&mut self) { - self.0.push(PreToken::BlankLinesContext(BlankLinesAllowed::BetweenComments)); + self.0.push(PreToken::BlankLinesContext( + LineSpacingPolicy::BetweenComments, + )); + } + + /// Inserts any preceding trivia into the stream. + fn push_preceding_trivia(&mut self, token: &wdl_ast::Token) { + assert!(!token.syntax().kind().is_trivia()); + let preceding_trivia = token.syntax().preceding_trivia(); + for token in preceding_trivia { + match token.kind() { + SyntaxKind::Whitespace => { + if !self.0.last().map_or(false, |t| { + matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine)) + }) { + self.0.push(PreToken::Trivia(Trivia::BlankLine)); + } + } + SyntaxKind::Comment => { + let comment = PreToken::Trivia(Trivia::Comment(Comment::Preceding( + token.text().trim_end().to_owned(), + ))); + self.0.push(comment); + } + _ => unreachable!("unexpected trivia: {:?}", token), + }; + } + } + + /// Inserts any inline trivia into the stream. + fn push_inline_trivia(&mut self, token: &wdl_ast::Token) { + assert!(!token.syntax().kind().is_trivia()); + if let Some(token) = token.syntax().inline_comment() { + let inline_comment = PreToken::Trivia(Trivia::Comment(Comment::Inline( + token.text().trim_end().to_owned(), + ))); + self.0.push(inline_comment); + } } /// Pushes an AST token into the stream. @@ -152,47 +185,21 @@ impl TokenStream { /// Any token may have preceding or inline trivia, unless that token is /// itself trivia (i.e. trivia cannot have trivia). pub fn push_ast_token(&mut self, token: &wdl_ast::Token) { - let syntax = token.syntax(); - let kind = syntax.kind(); - let mut inline_comment = None; - if !kind.is_trivia() { - let preceding_trivia = syntax.preceding_trivia(); - for token in preceding_trivia { - match token.kind() { - SyntaxKind::Whitespace => { - if !self.0.last().map_or(false, |t| { - matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine)) - }) { - self.0.push(PreToken::Trivia(Trivia::BlankLine)); - } - } - SyntaxKind::Comment => { - let comment = PreToken::Trivia(Trivia::Comment(Comment::Preceding( - token.text().trim_end().to_owned(), - ))); - self.0.push(comment); - } - _ => unreachable!("unexpected trivia: {:?}", token), - }; - } - if let Some(token) = syntax.inline_comment() { - inline_comment = Some(PreToken::Trivia(Trivia::Comment(Comment::Inline( - token.text().trim_end().to_owned(), - )))); - } - } else { - unreachable!("unexpected trivia: {:?}", syntax); - } - let token = PreToken::Literal(syntax.text().to_owned(), kind); - self.0.push(token); - - if let Some(inline_comment) = inline_comment { - self.0.push(inline_comment); - } + self.push_preceding_trivia(token); + self.0.push(PreToken::Literal( + token.syntax().text().to_owned(), + token.syntax().kind(), + )); + self.push_inline_trivia(token); } - /// Gets an iterator of references to each token in the stream. - pub fn iter(&self) -> impl Iterator { - self.0.iter() + /// Pushes a literal string into the stream in place of an AST token. + /// This will insert any trivia that would have been inserted with the AST + /// token. + pub fn push_literal_in_place_of_token(&mut self, token: &wdl_ast::Token, replacement: String) { + self.push_preceding_trivia(token); + self.0 + .push(PreToken::Literal(replacement, token.syntax().kind())); + self.push_inline_trivia(token); } } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index df5533ad7..cc23be186 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -54,26 +54,26 @@ pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { /// Formats a [`VersionStatement`](wdl_ast::VersionStatement). pub fn format_version_statement(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); - - if let Some(mut keywords) = children.remove(&SyntaxKind::VersionKeyword) { - let keyword = exactly_one!(keywords, "`version` keywords"); - (&keyword).write(stream); - } - - stream.end_word(); - - if let Some(mut versions) = children.remove(&SyntaxKind::Version) { - let version = exactly_one!(versions, "versions"); - (&version).write(stream); + for child in element.children().expect("version statement children") { + (&child).write(stream); + stream.end_word(); } - stream.end_line(); +} - if !children.is_empty() { - todo!( - "unhandled children for version statement: {:#?}", - children.keys() - ); +/// Formats a [`LiteralString`](wdl_ast::v1::LiteralString). +pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("literal string children") { + match child.element().kind() { + SyntaxKind::SingleQuote => { + stream.push_literal_in_place_of_token( + child.element().as_token().expect("token"), + "\"".to_owned(), + ); + } + _ => { + (&child).write(stream); + } + } } } diff --git a/wdl-format/src/v1/import.rs b/wdl-format/src/v1/import.rs index fe3efa43b..88f317b5b 100644 --- a/wdl-format/src/v1/import.rs +++ b/wdl-format/src/v1/import.rs @@ -1,69 +1,24 @@ //! Formatting for imports. -use wdl_ast::SyntaxKind; - use crate::PreToken; use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; -use crate::exactly_one; /// Formats an [`ImportAlias`](wdl_ast::v1::ImportAlias). pub fn format_import_alias(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("import alias children"); - - let alias_keyword = children.next().expect("alias keyword"); - (&alias_keyword).write(stream); - - stream.end_word(); - - let real_name = children.next().expect("ident"); - (&real_name).write(stream); - - stream.end_word(); - - let as_keyword = children.next().expect("`as` keyword"); - (&as_keyword).write(stream); - - stream.end_word(); - - let alias_name = children.next().expect("ident"); - (&alias_name).write(stream); - - stream.end_word(); - - if children.next().is_some() { - todo!("unhandled children for import alias"); + for child in element.children().expect("import alias children") { + (&child).write(stream); + stream.end_word(); } } /// Formats an [`ImportStatement`](wdl_ast::v1::ImportStatement). pub fn format_import_statement(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); - - if let Some(mut import_keywords) = children.remove(&SyntaxKind::ImportKeyword) { - let import_keyword = exactly_one!(import_keywords, "import keywords"); - (&import_keyword).write(stream); - } - - stream.end_word(); - - if let Some(mut string_literals) = children.remove(&SyntaxKind::LiteralStringNode) { - let string_literal = exactly_one!(string_literals, "string literals"); - (&string_literal).write(stream); - } - - stream.end_word(); - - if let Some(import_aliases) = children.remove(&SyntaxKind::ImportAliasNode) { - for import_alias in import_aliases { - (&import_alias).write(stream); - } + for child in element.children().expect("import statement children") { + (&child).write(stream); + stream.end_word(); } stream.end_line(); - - if !children.is_empty() { - todo!("unhandled children for import: {:#?}", children.keys()); - } } diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index 9f3635412..c4236b36a 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -14,10 +14,11 @@ use crate::exactly_one; pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children_by_kind(); - if let Some(mut keywords) = children.remove(&SyntaxKind::WorkflowKeyword) { - let keyword = exactly_one!(keywords, "workflow keywords"); - (&keyword).write(stream); - } + let mut keywords = children + .remove(&SyntaxKind::WorkflowKeyword) + .expect("workflow keywords"); + let keyword = exactly_one!(keywords, "workflow keywords"); + (&keyword).write(stream); stream.end_word(); diff --git a/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl new file mode 100644 index 000000000..eb40eca13 --- /dev/null +++ b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl @@ -0,0 +1,26 @@ +version 1.1 + +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# fileA 1.1 +import # fileA 1.2 +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" # fileA 2.3 +# fileA 3.1 +as # fileA 3.2 +# fileA 4.1 +bar # fileA 4.2 +# fileA 5.1 +alias # fileA 5.2 +# fileA 6.1 +qux # fileA 6.2 +# fileA 7.1 +as # fileA 7.2 +# fileA 8.1 +Qux # fileA 8.2 +# this comment belongs to fileC +import "fileC.wdl" # also fileC + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl new file mode 100644 index 000000000..04cb521da --- /dev/null +++ b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl @@ -0,0 +1,15 @@ +version 1.0 + +import "fileB.wdl" as foo # fileB +import "fileC.wdl" # fileC +import # fileA 1 +"fileA.wdl" # fileA 2 +as # fileA 3 +bar # fileA 4 +alias # fileA 5 +qux # fileA 6 +as # fileA 7 +Qux # fileA 8 + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl new file mode 100644 index 000000000..d4578f4cf --- /dev/null +++ b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl @@ -0,0 +1,8 @@ +version 1.1 + +import "fileB.wdl" as foo +import "fileA.wdl" as bar alias cows as horses alias cats as dogs +import "fileC.wdl" alias qux as Qux + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl new file mode 100644 index 000000000..29baa7042 --- /dev/null +++ b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl @@ -0,0 +1,26 @@ +version 1.1 + +# this comment belongs to fileC +import "fileC.wdl" +# this comment belongs to fileB +import "fileB.wdl" as foo +# fileA 1 +import +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" +# fileA 3 +as +# fileA 4 +bar +# fileA 5 +alias +# fileA 6 +qux +# fileA 7 +as +# fileA 8 +Qux + +workflow test { +} From acf0f150bec36f1afe086e4012c02033a95dc873 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 7 Oct 2024 18:44:43 -0400 Subject: [PATCH 13/60] [WIP] --- wdl-format/src/v1.rs | 60 +++++++------ wdl-format/src/v1/task.rs | 67 ++++++-------- wdl-format/src/v1/workflow.rs | 74 ++++++---------- wdl-format/src/v1/workflow/call.rs | 88 ++++++------------- .../source.formatted.wdl | 4 +- .../source.formatted.wdl | 4 +- .../source.formatted.wdl | 3 +- .../source.formatted.wdl | 8 +- 8 files changed, 130 insertions(+), 178 deletions(-) diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index cc23be186..5ac0fa973 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -1,5 +1,6 @@ //! Formatting of WDL v1.x elements. +use wdl_ast::AstToken; use wdl_ast::SyntaxKind; pub mod import; @@ -10,45 +11,54 @@ use crate::PreToken; use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; -use crate::exactly_one; /// Formats an [`Ast`](wdl_ast::Ast). pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); + let mut children = element.children().expect("AST children"); - if let Some(mut versions) = children.remove(&SyntaxKind::VersionStatementNode) { - let version = exactly_one!(versions, "version statements"); - - // TODO(clay): improve this by removing the reference. - (&version).write(stream); - } + let version_statement = children.next().expect("version statement"); + assert!(version_statement.element().kind() == SyntaxKind::VersionStatementNode); + (&version_statement).write(stream); stream.blank_line(); - if let Some(imports) = children.remove(&SyntaxKind::ImportStatementNode) { - for import in imports { - (&import).write(stream); + let mut imports = Vec::new(); + let mut remainder = Vec::new(); + + for child in children { + match child.element().kind() { + SyntaxKind::ImportStatementNode => imports.push(child), + _ => remainder.push(child), } } - stream.blank_line(); + imports.sort_by(|a, b| { + let a = a + .element() + .as_node() + .expect("import statement node") + .as_import_statement() + .expect("import statement"); + let b = b + .element() + .as_node() + .expect("import statement node") + .as_import_statement() + .expect("import statement"); + let a_uri = a.uri().text().expect("import uri"); + let b_uri = b.uri().text().expect("import uri"); + a_uri.as_str().cmp(b_uri.as_str()) + }); - if let Some(tasks) = children.remove(&SyntaxKind::TaskDefinitionNode) { - for task in tasks { - (&task).write(stream); - stream.blank_line(); - } + for import in imports { + (&import).write(stream); } - if let Some(workflows) = children.remove(&SyntaxKind::WorkflowDefinitionNode) { - for workflow in workflows { - (&workflow).write(stream); - stream.blank_line(); - } - } + stream.blank_line(); - if !children.is_empty() { - todo!("unhandled children for AST: {:#?}", children.keys()); + for child in remainder { + (&child).write(stream); + stream.blank_line(); } } diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index 08fabbe0a..f325c9df0 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -6,48 +6,35 @@ use crate::PreToken; use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; -use crate::exactly_one; /// Formats a [`TaskDefinition`](wdl_ast::v1::TaskDefinition). pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); - - if let Some(mut keywords) = children.remove(&SyntaxKind::TaskKeyword) { - let keyword = exactly_one!(keywords, "task keywords"); - (&keyword).write(stream); - } - - stream.end_word(); - - if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { - let ident = exactly_one!(idents, "idents"); - (&ident).write(stream); - } - - stream.end_word(); - - if let Some(mut braces) = children.remove(&SyntaxKind::OpenBrace) { - let brace = exactly_one!(braces, "open braces"); - (&brace).write(stream); - } - - stream.end_line(); - stream.increment_indent(); - - // TODO: Implement task body formatting. - stream.decrement_indent(); - - if let Some(mut braces) = children.remove(&SyntaxKind::CloseBrace) { - let brace = exactly_one!(braces, "closed braces"); - (&brace).write(stream); - } - - stream.end_line(); - - if !children.is_empty() { - todo!( - "unhandled children for task definition: {:#?}", - children.keys() - ); + for child in element.children().expect("task definition children") { + match child.element().kind() { + SyntaxKind::TaskKeyword => { + (&child).write(stream); + stream.end_word(); + } + SyntaxKind::Ident => { + (&child).write(stream); + stream.end_word(); + } + SyntaxKind::OpenBrace => { + (&child).write(stream); + stream.end_line(); + stream.increment_indent(); + } + SyntaxKind::CloseBrace => { + stream.decrement_indent(); + (&child).write(stream); + stream.end_line(); + } + _ => { + unreachable!( + "unexpected child in task definition: {:?}", + child.element().kind() + ); + } + } } } diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index c4236b36a..a46306bfc 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -8,54 +8,38 @@ use crate::PreToken; use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; -use crate::exactly_one; /// Formats a [`WorkflowDefinition`](wdl_ast::v1::WorkflowDefinition). pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); - - let mut keywords = children - .remove(&SyntaxKind::WorkflowKeyword) - .expect("workflow keywords"); - let keyword = exactly_one!(keywords, "workflow keywords"); - (&keyword).write(stream); - - stream.end_word(); - - if let Some(mut idents) = children.remove(&SyntaxKind::Ident) { - let idents = exactly_one!(idents, "idents"); - (&idents).write(stream); - } - - stream.end_word(); - - if let Some(mut braces) = children.remove(&SyntaxKind::OpenBrace) { - let brace = exactly_one!(braces, "open braces"); - (&brace).write(stream); - } - - stream.end_line(); - stream.increment_indent(); - - if let Some(calls) = children.remove(&SyntaxKind::CallStatementNode) { - for call in calls { - (&call).write(stream); - stream.end_line(); + for child in element.children().expect("workflow definition children") { + match child.element().kind() { + SyntaxKind::WorkflowKeyword => { + (&child).write(stream); + stream.end_word(); + } + SyntaxKind::Ident => { + (&child).write(stream); + stream.end_word(); + } + SyntaxKind::OpenBrace => { + (&child).write(stream); + stream.end_line(); + stream.increment_indent(); + } + SyntaxKind::CallStatementNode => { + (&child).write(stream); + } + SyntaxKind::CloseBrace => { + stream.decrement_indent(); + (&child).write(stream); + stream.end_line(); + } + _ => { + unreachable!( + "unexpected child in workflow definition: {:?}", + child.element().kind() + ); + } } } - - stream.decrement_indent(); - - if let Some(mut braces) = children.remove(&SyntaxKind::CloseBrace) { - let brace = exactly_one!(braces, "closed braces"); - (&brace).write(stream); - stream.end_line(); - } - - if !children.is_empty() { - todo!( - "unhandled children for workflow definition: {:#?}", - children.keys() - ); - } } diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs index 8526268c2..4018c44f1 100644 --- a/wdl-format/src/v1/workflow/call.rs +++ b/wdl-format/src/v1/workflow/call.rs @@ -6,72 +6,42 @@ use crate::PreToken; use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; -use crate::exactly_one; /// Formats a [`CallStatement`](wdl_ast::v1::CallStatement). pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); - - if let Some(mut keywords) = children.remove(&SyntaxKind::CallKeyword) { - let keyword = exactly_one!(keywords, "call keywords"); - (&keyword).write(stream); - } - - stream.end_word(); - - if let Some(mut call_nodes) = children.remove(&SyntaxKind::CallTargetNode) { - let call_node = exactly_one!(call_nodes, "call target nodes"); - (&call_node).write(stream); - } - - stream.end_word(); - - if let Some(mut open_braces) = children.remove(&SyntaxKind::OpenBrace) { - let open_brace = exactly_one!(open_braces, "open braces"); - (&open_brace).write(stream); - } - - stream.end_word(); - - if let Some(mut close_braces) = children.remove(&SyntaxKind::CloseBrace) { - let close_brace = exactly_one!(close_braces, "close braces"); - (&close_brace).write(stream); - } - - stream.end_line(); - - if !children.is_empty() { - todo!( - "unhandled children for call statement: {:#?}", - children.keys() - ); + for child in element.children().expect("call statement children") { + match child.element().kind() { + SyntaxKind::CallKeyword => { + (&child).write(stream); + stream.end_word(); + } + SyntaxKind::CallTargetNode => { + (&child).write(stream); + stream.end_word(); + } + SyntaxKind::OpenBrace => { + (&child).write(stream); + stream.end_line(); + stream.increment_indent(); + } + SyntaxKind::CloseBrace => { + stream.decrement_indent(); + (&child).write(stream); + stream.end_line(); + } + _ => { + unreachable!( + "unexpected child in call statement: {:?}", + child.element().kind() + ); + } + } } } /// Formats a [`CallTarget`](wdl_ast::v1::CallTarget). pub fn format_call_target(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children_by_kind(); - - if let Some(idents) = children.remove(&SyntaxKind::Ident) { - let mut idents = idents.into_iter(); - let first_ident = idents.next().expect("at least one ident"); - (&first_ident).write(stream); - - if let Some(mut dots) = children.remove(&SyntaxKind::Dot) { - let dot = exactly_one!(dots, "dots"); - (&dot).write(stream); - - let second_ident = idents.next().expect("second ident"); - (&second_ident).write(stream); - - assert!(idents.next().is_none(), "too many idents"); - } - } - - if !children.is_empty() { - todo!( - "unhandled children for call statement: {:#?}", - children.keys() - ); + for child in element.children().expect("call target children") { + (&child).write(stream); } } diff --git a/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl index eb40eca13..ac12fc6b3 100644 --- a/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl @@ -1,7 +1,5 @@ version 1.1 -# this comment belongs to fileB -import "fileB.wdl" as foo # also fileB # fileA 1.1 import # fileA 1.2 # fileA 2.1 @@ -19,6 +17,8 @@ qux # fileA 6.2 as # fileA 7.2 # fileA 8.1 Qux # fileA 8.2 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB # this comment belongs to fileC import "fileC.wdl" # also fileC diff --git a/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl index 04cb521da..48c0b1c8d 100644 --- a/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl @@ -1,7 +1,5 @@ version 1.0 -import "fileB.wdl" as foo # fileB -import "fileC.wdl" # fileC import # fileA 1 "fileA.wdl" # fileA 2 as # fileA 3 @@ -10,6 +8,8 @@ alias # fileA 5 qux # fileA 6 as # fileA 7 Qux # fileA 8 +import "fileB.wdl" as foo # fileB +import "fileC.wdl" # fileC workflow test { } diff --git a/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl index d4578f4cf..b5d7fa424 100644 --- a/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl @@ -1,7 +1,8 @@ version 1.1 -import "fileB.wdl" as foo import "fileA.wdl" as bar alias cows as horses alias cats as dogs + +import "fileB.wdl" as foo import "fileC.wdl" alias qux as Qux workflow test { diff --git a/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl index 29baa7042..7b445a792 100644 --- a/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl @@ -1,9 +1,5 @@ version 1.1 -# this comment belongs to fileC -import "fileC.wdl" -# this comment belongs to fileB -import "fileB.wdl" as foo # fileA 1 import # fileA 2.1 @@ -21,6 +17,10 @@ qux as # fileA 8 Qux +# this comment belongs to fileB +import "fileB.wdl" as foo +# this comment belongs to fileC +import "fileC.wdl" workflow test { } From d97580f514a488c930f4807cf93cfb3aa2aec0a4 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 8 Oct 2024 11:06:52 -0400 Subject: [PATCH 14/60] [WIP] --- wdl-format/src/lib.rs | 28 +- wdl-format/src/token/post.rs | 39 ++- wdl-format/src/token/pre.rs | 6 + wdl-format/src/v1.rs | 239 ++++++++++++++++++ wdl-format/src/v1/struct.rs | 75 ++++++ wdl-format/src/v1/task.rs | 99 +++++++- wdl-format/src/v1/workflow.rs | 86 +++++-- .../source.formatted.wdl | 30 +-- .../source.formatted.wdl | 14 +- .../source.formatted.wdl | 30 +-- .../interrupt_example/source.formatted.wdl | 13 + 11 files changed, 576 insertions(+), 83 deletions(-) create mode 100644 wdl-format/src/v1/struct.rs create mode 100644 wdl-format/tests/format/interrupt_example/source.formatted.wdl diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index 5ff6973cc..bc1bbb3dd 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -53,7 +53,7 @@ impl Writable for &FormatElement { AstNode::AdditionExpr(_) => todo!(), AstNode::ArrayType(_) => todo!(), AstNode::Ast(_) => v1::format_ast(self, stream), - AstNode::BoundDecl(_) => todo!(), + AstNode::BoundDecl(_) => v1::format_bound_decl(self, stream), AstNode::CallAfter(_) => todo!(), AstNode::CallAlias(_) => todo!(), AstNode::CallExpr(_) => todo!(), @@ -75,21 +75,21 @@ impl Writable for &FormatElement { AstNode::ImportStatement(_) => v1::import::format_import_statement(self, stream), AstNode::IndexExpr(_) => todo!(), AstNode::InequalityExpr(_) => todo!(), - AstNode::InputSection(_) => todo!(), + AstNode::InputSection(_) => v1::format_input_section(self, stream), AstNode::LessEqualExpr(_) => todo!(), AstNode::LessExpr(_) => todo!(), AstNode::LiteralArray(_) => todo!(), - AstNode::LiteralBoolean(_) => todo!(), - AstNode::LiteralFloat(_) => todo!(), + AstNode::LiteralBoolean(_) => v1::format_literal_boolean(self, stream), + AstNode::LiteralFloat(_) => v1::format_literal_float(self, stream), AstNode::LiteralHints(_) => todo!(), AstNode::LiteralHintsItem(_) => todo!(), AstNode::LiteralInput(_) => todo!(), AstNode::LiteralInputItem(_) => todo!(), - AstNode::LiteralInteger(_) => todo!(), + AstNode::LiteralInteger(_) => v1::format_literal_integer(self, stream), AstNode::LiteralMap(_) => todo!(), AstNode::LiteralMapItem(_) => todo!(), AstNode::LiteralNone(_) => todo!(), - AstNode::LiteralNull(_) => todo!(), + AstNode::LiteralNull(_) => v1::format_literal_null(self, stream), AstNode::LiteralObject(_) => todo!(), AstNode::LiteralObjectItem(_) => todo!(), AstNode::LiteralOutput(_) => todo!(), @@ -103,9 +103,9 @@ impl Writable for &FormatElement { AstNode::LogicalOrExpr(_) => todo!(), AstNode::MapType(_) => todo!(), AstNode::MetadataArray(_) => todo!(), - AstNode::MetadataObject(_) => todo!(), - AstNode::MetadataObjectItem(_) => todo!(), - AstNode::MetadataSection(_) => todo!(), + AstNode::MetadataObject(_) => v1::format_metadata_object(self, stream), + AstNode::MetadataObjectItem(_) => v1::format_metadata_object_item(self, stream), + AstNode::MetadataSection(_) => v1::format_metadata_section(self, stream), AstNode::ModuloExpr(_) => todo!(), AstNode::MultiplicationExpr(_) => todo!(), AstNode::NameRef(_) => todo!(), @@ -116,21 +116,23 @@ impl Writable for &FormatElement { AstNode::ParameterMetadataSection(_) => todo!(), AstNode::ParenthesizedExpr(_) => todo!(), AstNode::Placeholder(_) => todo!(), - AstNode::PrimitiveType(_) => todo!(), + AstNode::PrimitiveType(_) => v1::format_primitive_type(self, stream), AstNode::RequirementsItem(_) => todo!(), AstNode::RequirementsSection(_) => todo!(), AstNode::RuntimeItem(_) => todo!(), AstNode::RuntimeSection(_) => todo!(), AstNode::ScatterStatement(_) => todo!(), AstNode::SepOption(_) => todo!(), - AstNode::StructDefinition(_) => todo!(), + AstNode::StructDefinition(_) => { + v1::r#struct::format_struct_definition(self, stream) + } AstNode::SubtractionExpr(_) => todo!(), AstNode::TaskDefinition(_) => v1::task::format_task_definition(self, stream), AstNode::TaskHintsItem(_) => todo!(), AstNode::TaskHintsSection(_) => todo!(), AstNode::TrueFalseOption(_) => todo!(), - AstNode::TypeRef(_) => todo!(), - AstNode::UnboundDecl(_) => todo!(), + AstNode::TypeRef(_) => v1::format_type_ref(self, stream), + AstNode::UnboundDecl(_) => v1::format_unbound_decl(self, stream), AstNode::VersionStatement(_) => v1::format_version_statement(self, stream), AstNode::WorkflowDefinition(_) => { v1::workflow::format_workflow_definition(self, stream) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 81cc86d35..a21833f0f 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -74,6 +74,9 @@ pub struct Postprocessor { /// The current indentation level. indent_level: usize, + /// Whether the current line has been interrupted by trivia. + interrupted: bool, + /// Whether blank lines are allowed in the current context. blank_lines_allowed: LineSpacingPolicy, } @@ -85,7 +88,7 @@ impl Postprocessor { let mut stream = input.into_iter().peekable(); while let Some(token) = stream.next() { - self.step(token, stream.peek(), &mut output) + self.step(token, stream.peek(), &mut output); } self.trim_whitespace(&mut output); @@ -99,7 +102,7 @@ impl Postprocessor { pub fn step( &mut self, token: PreToken, - _next: Option<&PreToken>, + next: Option<&PreToken>, stream: &mut TokenStream, ) { match token { @@ -109,6 +112,7 @@ impl Postprocessor { stream.push(PostToken::Newline); } PreToken::LineEnd => { + self.interrupted = false; self.end_line(stream); } PreToken::WordEnd => { @@ -133,7 +137,19 @@ impl Postprocessor { self.blank_lines_allowed = policy; } PreToken::Literal(value, kind) => { - assert!(kind != SyntaxKind::Comment); + assert!(kind != SyntaxKind::Comment && kind != SyntaxKind::Whitespace); + if self.interrupted + && matches!( + kind, + SyntaxKind::OpenBrace + | SyntaxKind::OpenBracket + | SyntaxKind::OpenParen + | SyntaxKind::OpenHeredoc + ) + && stream.0.last() == Some(&PostToken::Indent) + { + stream.0.pop(); + } stream.push(PostToken::Literal(value.to_owned())); self.position = LinePosition::MiddleOfLine; } @@ -149,6 +165,9 @@ impl Postprocessor { } Trivia::Comment(comment) => match comment { Comment::Preceding(value) => { + if stream.0.last() != Some(&PostToken::Newline) { + self.interrupted = true; + } self.end_line(stream); stream.push(PostToken::Literal(value.to_owned())); self.position = LinePosition::MiddleOfLine; @@ -156,6 +175,11 @@ impl Postprocessor { } Comment::Inline(value) => { assert!(self.position == LinePosition::MiddleOfLine); + if let Some(next) = next { + if next != &PreToken::LineEnd { + self.interrupted = true; + } + } self.trim_last_line(stream); stream.push(PostToken::Space); stream.push(PostToken::Space); @@ -201,7 +225,14 @@ impl Postprocessor { /// [`LinePosition::StartOfLine`]. fn indent(&self, stream: &mut TokenStream) { assert!(self.position == LinePosition::StartOfLine); - for _ in 0..self.indent_level { + + let level = if self.interrupted { + self.indent_level + 1 + } else { + self.indent_level + }; + + for _ in 0..level { stream.push(PostToken::Indent); } } diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 0fa1d422c..0bcb6003b 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -202,4 +202,10 @@ impl TokenStream { .push(PreToken::Literal(replacement, token.syntax().kind())); self.push_inline_trivia(token); } + + /// Pushes a literal string into the stream. + /// This will not insert any trivia. + pub fn push_literal(&mut self, value: String, kind: SyntaxKind) { + self.0.push(PreToken::Literal(value, kind)); + } } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index 5ac0fa973..d0753026a 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -4,6 +4,7 @@ use wdl_ast::AstToken; use wdl_ast::SyntaxKind; pub mod import; +pub mod r#struct; pub mod task; pub mod workflow; @@ -87,3 +88,241 @@ pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream

) { + let mut children = element.children().expect("literal boolean children"); + let bool = children.next().expect("literal boolean token"); + (&bool).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralInteger`](wdl_ast::v1::LiteralInteger). +pub fn format_literal_integer(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("literal integer children") { + (&child).write(stream); + } +} +/// Formats a [`LiteralFloat`](wdl_ast::v1::LiteralFloat). +pub fn format_literal_float(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("literal float children") { + (&child).write(stream); + } +} + +/// Formats a [`LiteralNull`](wdl_ast::v1::LiteralNull). +pub fn format_literal_null(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal null children"); + let null = children.next().expect("literal null token"); + (&null).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`PrimitiveType`](wdl_ast::v1::PrimitiveType). +pub fn format_primitive_type(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("primitive type children"); + let t = children.next().expect("primitive type token"); + (&t).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`TypeRef`](wdl_ast::v1::TypeRef). +pub fn format_type_ref(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("type ref children"); + let t = children.next().expect("type ref type"); + (&t).write(stream); + assert!(children.next().is_none()); +} + +/// Formats an [`UnboundDecl`](wdl_ast::v1::UnboundDecl). +pub fn format_unbound_decl(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("unbound decl children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} + +/// Formats a [`BoundDecl`](wdl_ast::v1::BoundDecl). +pub fn format_bound_decl(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("bound decl children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} + +/// Formats an [`InputSection`](wdl_ast::v1::InputSection). +pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("input section children"); + + let input_keyword = children.next().expect("input section input keyword"); + assert!(input_keyword.element().kind() == SyntaxKind::InputKeyword); + (&input_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("input section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut close_brace = None; + let inputs = children + .filter_map(|child| { + if child.element().kind() == SyntaxKind::BoundDeclNode { + Some(child) + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.clone()); + None + } + }) + .collect::>(); + + // TODO: sort inputs + for input in inputs { + (&input).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("input section close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`MetadataObject`](wdl_ast::v1::MetadataObject). +pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata object children"); + + let open_brace = children.next().expect("metadata object open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut close_brace = None; + let mut commas = Vec::new(); + let items = children + .filter(|child| { + if child.element().kind() == SyntaxKind::MetadataObjectItemNode { + true + } else if child.element().kind() == SyntaxKind::Comma { + commas.push(child.to_owned()); + false + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.to_owned()); + false + } + }) + .collect::>(); + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (&comma).write(stream); + stream.end_line(); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + stream.end_line(); + } + } +} + +/// Formats a [`MetadataObjectItem`](wdl_ast::v1::MetadataObjectItem). +pub fn format_metadata_object_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata object item children"); + + let key = children.next().expect("metadata object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("metadata object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("metadata object item value"); + (&value).write(stream); + stream.end_line(); + + assert!(children.next().is_none()); +} + +/// Formats a [MetadataSection](wdl_ast::v1::MetadataSection). +pub fn format_metadata_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("meta section children"); + + let meta_keyword = children.next().expect("meta keyword"); + assert!(meta_keyword.element().kind() == SyntaxKind::MetaKeyword); + (&meta_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("metadata section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut close_brace = None; + let metadata = children + .filter_map(|child| { + if child.element().kind() == SyntaxKind::MetadataObjectItemNode { + Some(child) + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.clone()); + None + } + }) + .collect::>(); + + for item in metadata { + (&item).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("metadata section close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`ParameterMetadataSection`](wdl_ast::v1::ParameterMetadataSection). +pub fn format_parameter_metadata_section( + element: &FormatElement, + stream: &mut TokenStream, +) { + let mut children = element.children().expect("parameter meta section children"); + + let parameter_meta_keyword = children.next().expect("parameter meta keyword"); + assert!(parameter_meta_keyword.element().kind() == SyntaxKind::ParameterMetaKeyword); + (¶meter_meta_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("parameter metadata section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut close_brace = None; + let metadata = children + .filter_map(|child| { + if child.element().kind() == SyntaxKind::MetadataObjectItemNode { + Some(child) + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.clone()); + None + } + }) + .collect::>(); + + for item in metadata { + (&item).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("parameter metadata section close brace")).write(stream); + stream.end_line(); +} diff --git a/wdl-format/src/v1/struct.rs b/wdl-format/src/v1/struct.rs new file mode 100644 index 000000000..faeb8e8e0 --- /dev/null +++ b/wdl-format/src/v1/struct.rs @@ -0,0 +1,75 @@ +//! Formatting for structs. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`StructDefinition`](wdl_ast::v1::StructDefinition). +pub fn format_struct_definition(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("struct definition children"); + + let struct_keyword = children.next().expect("struct keyword"); + assert!(struct_keyword.element().kind() == SyntaxKind::StructKeyword); + (&struct_keyword).write(stream); + stream.end_word(); + + let name = children.next().expect("struct name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut meta = None; + let mut parameter_meta = None; + let mut members = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataSectionNode => { + meta = Some(child.clone()); + } + SyntaxKind::ParameterMetadataSectionNode => { + parameter_meta = Some(child.clone()); + } + SyntaxKind::UnboundDeclNode => { + members.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in struct definition: {:?}", + child.element().kind() + ); + } + } + } + + if let Some(meta) = meta { + (&meta).write(stream); + stream.blank_line(); + } + + if let Some(parameter_meta) = parameter_meta { + (¶meter_meta).write(stream); + stream.blank_line(); + } + + for member in members { + (&member).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("struct definition close brace")).write(stream); + stream.end_line(); +} diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index f325c9df0..6c7b6d84c 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -9,25 +9,58 @@ use crate::element::FormatElement; /// Formats a [`TaskDefinition`](wdl_ast::v1::TaskDefinition). pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("task definition children") { + let mut children = element.children().expect("task definition children"); + + let task_keyword = children.next().expect("task keyword"); + assert!(task_keyword.element().kind() == SyntaxKind::TaskKeyword); + (&task_keyword).write(stream); + stream.end_word(); + + let name = children.next().expect("task name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut meta = None; + let mut parameter_meta = None; + let mut input = None; + let mut body = Vec::new(); + let mut runtime = None; + let mut command = None; + let mut output = None; + let mut close_brace = None; + + for child in children { match child.element().kind() { - SyntaxKind::TaskKeyword => { - (&child).write(stream); - stream.end_word(); + SyntaxKind::InputSectionNode => { + input = Some(child.clone()); + } + SyntaxKind::MetadataSectionNode => { + meta = Some(child.clone()); } - SyntaxKind::Ident => { - (&child).write(stream); - stream.end_word(); + SyntaxKind::ParameterMetadataSectionNode => { + parameter_meta = Some(child.clone()); } - SyntaxKind::OpenBrace => { - (&child).write(stream); - stream.end_line(); - stream.increment_indent(); + SyntaxKind::RuntimeSectionNode => { + runtime = Some(child.clone()); + } + SyntaxKind::CommandSectionNode => { + command = Some(child.clone()); + } + SyntaxKind::OutputSectionNode => { + output = Some(child.clone()); + } + SyntaxKind::BoundDeclNode => { + body.push(child.clone()); } SyntaxKind::CloseBrace => { - stream.decrement_indent(); - (&child).write(stream); - stream.end_line(); + close_brace = Some(child.clone()); } _ => { unreachable!( @@ -37,4 +70,42 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< } } } + + if let Some(meta) = meta { + (&meta).write(stream); + stream.blank_line(); + } + + if let Some(parameter_meta) = parameter_meta { + (¶meter_meta).write(stream); + stream.blank_line(); + } + + if let Some(input) = input { + (&input).write(stream); + stream.blank_line(); + } + + for child in body { + (&child).write(stream); + } + + if let Some(command) = command { + (&command).write(stream); + stream.blank_line(); + } + + if let Some(output) = output { + (&output).write(stream); + stream.blank_line(); + } + + if let Some(runtime) = runtime { + (&runtime).write(stream); + stream.blank_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("task close brace")).write(stream); + stream.end_line(); } diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index a46306bfc..08686309d 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -11,28 +11,56 @@ use crate::element::FormatElement; /// Formats a [`WorkflowDefinition`](wdl_ast::v1::WorkflowDefinition). pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("workflow definition children") { + let mut children = element.children().expect("workflow definition children"); + + let workflow_keyword = children.next().expect("workflow keyword"); + assert!(workflow_keyword.element().kind() == SyntaxKind::WorkflowKeyword); + (&workflow_keyword).write(stream); + stream.end_word(); + + let name = children.next().expect("workflow name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut meta = None; + let mut parameter_meta = None; + let mut input = None; + let mut body = Vec::new(); + let mut output = None; + let mut close_brace = None; + + for child in children { match child.element().kind() { - SyntaxKind::WorkflowKeyword => { - (&child).write(stream); - stream.end_word(); + SyntaxKind::MetadataSectionNode => { + meta = Some(child.clone()); } - SyntaxKind::Ident => { - (&child).write(stream); - stream.end_word(); + SyntaxKind::ParameterMetadataSectionNode => { + parameter_meta = Some(child.clone()); } - SyntaxKind::OpenBrace => { - (&child).write(stream); - stream.end_line(); - stream.increment_indent(); + SyntaxKind::InputSectionNode => { + input = Some(child.clone()); + } + SyntaxKind::BoundDeclNode => { + body.push(child.clone()); } SyntaxKind::CallStatementNode => { - (&child).write(stream); + body.push(child.clone()); + } + SyntaxKind::ConditionalStatementNode => { + body.push(child.clone()); + } + SyntaxKind::OutputSectionNode => { + output = Some(child.clone()); } SyntaxKind::CloseBrace => { - stream.decrement_indent(); - (&child).write(stream); - stream.end_line(); + close_brace = Some(child.clone()); } _ => { unreachable!( @@ -42,4 +70,32 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr } } } + + if let Some(meta) = meta { + (&meta).write(stream); + stream.blank_line(); + } + + if let Some(parameter_meta) = parameter_meta { + (¶meter_meta).write(stream); + stream.blank_line(); + } + + if let Some(input) = input { + (&input).write(stream); + stream.blank_line(); + } + + for child in body { + (&child).write(stream); + } + + if let Some(output) = output { + (&output).write(stream); + stream.blank_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("workflow close brace")).write(stream); + stream.end_line(); } diff --git a/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl index ac12fc6b3..f5eb20569 100644 --- a/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl @@ -2,21 +2,21 @@ version 1.1 # fileA 1.1 import # fileA 1.2 -# fileA 2.1 -# fileA 2.2 -"fileA.wdl" # fileA 2.3 -# fileA 3.1 -as # fileA 3.2 -# fileA 4.1 -bar # fileA 4.2 -# fileA 5.1 -alias # fileA 5.2 -# fileA 6.1 -qux # fileA 6.2 -# fileA 7.1 -as # fileA 7.2 -# fileA 8.1 -Qux # fileA 8.2 + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" # fileA 2.3 + # fileA 3.1 + as # fileA 3.2 + # fileA 4.1 + bar # fileA 4.2 + # fileA 5.1 + alias # fileA 5.2 + # fileA 6.1 + qux # fileA 6.2 + # fileA 7.1 + as # fileA 7.2 + # fileA 8.1 + Qux # fileA 8.2 # this comment belongs to fileB import "fileB.wdl" as foo # also fileB # this comment belongs to fileC diff --git a/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl index 48c0b1c8d..cb225b417 100644 --- a/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl @@ -1,13 +1,13 @@ version 1.0 import # fileA 1 -"fileA.wdl" # fileA 2 -as # fileA 3 -bar # fileA 4 -alias # fileA 5 -qux # fileA 6 -as # fileA 7 -Qux # fileA 8 + "fileA.wdl" # fileA 2 + as # fileA 3 + bar # fileA 4 + alias # fileA 5 + qux # fileA 6 + as # fileA 7 + Qux # fileA 8 import "fileB.wdl" as foo # fileB import "fileC.wdl" # fileC diff --git a/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl index 7b445a792..ab1d380af 100644 --- a/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl @@ -2,21 +2,21 @@ version 1.1 # fileA 1 import -# fileA 2.1 -# fileA 2.2 -"fileA.wdl" -# fileA 3 -as -# fileA 4 -bar -# fileA 5 -alias -# fileA 6 -qux -# fileA 7 -as -# fileA 8 -Qux + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" + # fileA 3 + as + # fileA 4 + bar + # fileA 5 + alias + # fileA 6 + qux + # fileA 7 + as + # fileA 8 + Qux # this comment belongs to fileB import "fileB.wdl" as foo # this comment belongs to fileC diff --git a/wdl-format/tests/format/interrupt_example/source.formatted.wdl b/wdl-format/tests/format/interrupt_example/source.formatted.wdl new file mode 100644 index 000000000..4797ab7c7 --- /dev/null +++ b/wdl-format/tests/format/interrupt_example/source.formatted.wdl @@ -0,0 +1,13 @@ +version # interrupt + 1.2 # how far should '1.2' be indented? + +workflow + # interrupt + test # should this be indented? + # interrupt +{ + meta # interrupt + { # how far should this bracket be indented? + } + +} From 343353af345d44571fe8c45db6174bef55c556fd Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Wed, 9 Oct 2024 09:30:38 -0400 Subject: [PATCH 15/60] [WIP] tests: seaseq case formats --- wdl-format/src/lib.rs | 80 +- wdl-format/src/token/post.rs | 24 +- wdl-format/src/token/pre.rs | 8 +- wdl-format/src/v1.rs | 172 ++-- wdl-format/src/v1/expr.rs | 313 ++++++ wdl-format/src/v1/struct.rs | 67 ++ wdl-format/src/v1/workflow.rs | 85 +- wdl-format/src/v1/workflow/call.rs | 147 ++- .../format/seaseq-case/source.formatted.wdl | 939 ++++++++++++++++++ 9 files changed, 1706 insertions(+), 129 deletions(-) create mode 100644 wdl-format/src/v1/expr.rs create mode 100644 wdl-format/tests/format/seaseq-case/source.formatted.wdl diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index bc1bbb3dd..e80d6da74 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -49,43 +49,47 @@ impl Writable for &FormatElement { fn write(&self, stream: &mut TokenStream) { match self.element() { Element::Node(node) => match node { - AstNode::AccessExpr(_) => todo!(), - AstNode::AdditionExpr(_) => todo!(), - AstNode::ArrayType(_) => todo!(), + AstNode::AccessExpr(_) => v1::expr::format_access_expr(self, stream), + AstNode::AdditionExpr(_) => v1::expr::format_addition_expr(self, stream), + AstNode::ArrayType(_) => v1::format_array_type(self, stream), AstNode::Ast(_) => v1::format_ast(self, stream), AstNode::BoundDecl(_) => v1::format_bound_decl(self, stream), - AstNode::CallAfter(_) => todo!(), - AstNode::CallAlias(_) => todo!(), - AstNode::CallExpr(_) => todo!(), - AstNode::CallInputItem(_) => todo!(), + AstNode::CallAfter(_) => v1::workflow::call::format_call_after(self, stream), + AstNode::CallAlias(_) => v1::workflow::call::format_call_alias(self, stream), + AstNode::CallExpr(_) => v1::expr::format_call_expr(self, stream), + AstNode::CallInputItem(_) => { + v1::workflow::call::format_call_input_item(self, stream) + } AstNode::CallStatement(_) => { v1::workflow::call::format_call_statement(self, stream) } AstNode::CallTarget(_) => v1::workflow::call::format_call_target(self, stream), AstNode::CommandSection(_) => todo!(), - AstNode::ConditionalStatement(_) => todo!(), + AstNode::ConditionalStatement(_) => { + v1::workflow::format_conditional_statement(self, stream) + } AstNode::DefaultOption(_) => todo!(), AstNode::DivisionExpr(_) => todo!(), - AstNode::EqualityExpr(_) => todo!(), + AstNode::EqualityExpr(_) => v1::expr::format_equality_expr(self, stream), AstNode::ExponentiationExpr(_) => todo!(), - AstNode::GreaterEqualExpr(_) => todo!(), - AstNode::GreaterExpr(_) => todo!(), - AstNode::IfExpr(_) => todo!(), + AstNode::GreaterEqualExpr(_) => v1::expr::format_greater_equal_expr(self, stream), + AstNode::GreaterExpr(_) => v1::expr::format_greater_expr(self, stream), + AstNode::IfExpr(_) => v1::expr::format_if_expr(self, stream), AstNode::ImportAlias(_) => v1::import::format_import_alias(self, stream), AstNode::ImportStatement(_) => v1::import::format_import_statement(self, stream), - AstNode::IndexExpr(_) => todo!(), - AstNode::InequalityExpr(_) => todo!(), + AstNode::IndexExpr(_) => v1::expr::format_index_expr(self, stream), + AstNode::InequalityExpr(_) => v1::expr::format_inequality_expr(self, stream), AstNode::InputSection(_) => v1::format_input_section(self, stream), - AstNode::LessEqualExpr(_) => todo!(), - AstNode::LessExpr(_) => todo!(), - AstNode::LiteralArray(_) => todo!(), - AstNode::LiteralBoolean(_) => v1::format_literal_boolean(self, stream), - AstNode::LiteralFloat(_) => v1::format_literal_float(self, stream), + AstNode::LessEqualExpr(_) => v1::expr::format_less_equal_expr(self, stream), + AstNode::LessExpr(_) => v1::expr::format_less_expr(self, stream), + AstNode::LiteralArray(_) => v1::expr::format_literal_array(self, stream), + AstNode::LiteralBoolean(_) => v1::expr::format_literal_boolean(self, stream), + AstNode::LiteralFloat(_) => v1::expr::format_literal_float(self, stream), AstNode::LiteralHints(_) => todo!(), AstNode::LiteralHintsItem(_) => todo!(), AstNode::LiteralInput(_) => todo!(), AstNode::LiteralInputItem(_) => todo!(), - AstNode::LiteralInteger(_) => v1::format_literal_integer(self, stream), + AstNode::LiteralInteger(_) => v1::expr::format_literal_integer(self, stream), AstNode::LiteralMap(_) => todo!(), AstNode::LiteralMapItem(_) => todo!(), AstNode::LiteralNone(_) => todo!(), @@ -95,33 +99,41 @@ impl Writable for &FormatElement { AstNode::LiteralOutput(_) => todo!(), AstNode::LiteralOutputItem(_) => todo!(), AstNode::LiteralPair(_) => todo!(), - AstNode::LiteralString(_) => v1::format_literal_string(self, stream), - AstNode::LiteralStruct(_) => todo!(), - AstNode::LiteralStructItem(_) => todo!(), - AstNode::LogicalAndExpr(_) => todo!(), - AstNode::LogicalNotExpr(_) => todo!(), - AstNode::LogicalOrExpr(_) => todo!(), + AstNode::LiteralString(_) => v1::expr::format_literal_string(self, stream), + AstNode::LiteralStruct(_) => v1::r#struct::format_literal_struct(self, stream), + AstNode::LiteralStructItem(_) => { + v1::r#struct::format_literal_struct_item(self, stream) + } + AstNode::LogicalAndExpr(_) => v1::expr::format_logical_and_expr(self, stream), + AstNode::LogicalNotExpr(_) => v1::expr::format_logical_not_expr(self, stream), + AstNode::LogicalOrExpr(_) => v1::expr::format_logical_or_expr(self, stream), AstNode::MapType(_) => todo!(), - AstNode::MetadataArray(_) => todo!(), + AstNode::MetadataArray(_) => v1::format_metadata_array(self, stream), AstNode::MetadataObject(_) => v1::format_metadata_object(self, stream), AstNode::MetadataObjectItem(_) => v1::format_metadata_object_item(self, stream), AstNode::MetadataSection(_) => v1::format_metadata_section(self, stream), AstNode::ModuloExpr(_) => todo!(), - AstNode::MultiplicationExpr(_) => todo!(), - AstNode::NameRef(_) => todo!(), - AstNode::NegationExpr(_) => todo!(), - AstNode::OutputSection(_) => todo!(), + AstNode::MultiplicationExpr(_) => { + v1::expr::format_multiplication_expr(self, stream) + } + AstNode::NameRef(_) => v1::expr::format_name_ref(self, stream), + AstNode::NegationExpr(_) => v1::expr::format_negation_expr(self, stream), + AstNode::OutputSection(_) => v1::format_output_section(self, stream), AstNode::PairType(_) => todo!(), AstNode::ObjectType(_) => todo!(), - AstNode::ParameterMetadataSection(_) => todo!(), - AstNode::ParenthesizedExpr(_) => todo!(), + AstNode::ParameterMetadataSection(_) => { + v1::format_parameter_metadata_section(self, stream) + } + AstNode::ParenthesizedExpr(_) => v1::expr::format_parenthesized_expr(self, stream), AstNode::Placeholder(_) => todo!(), AstNode::PrimitiveType(_) => v1::format_primitive_type(self, stream), AstNode::RequirementsItem(_) => todo!(), AstNode::RequirementsSection(_) => todo!(), AstNode::RuntimeItem(_) => todo!(), AstNode::RuntimeSection(_) => todo!(), - AstNode::ScatterStatement(_) => todo!(), + AstNode::ScatterStatement(_) => { + v1::workflow::format_scatter_statement(self, stream) + } AstNode::SepOption(_) => todo!(), AstNode::StructDefinition(_) => { v1::r#struct::format_struct_definition(self, stream) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index a21833f0f..c69352d5a 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -107,9 +107,7 @@ impl Postprocessor { ) { match token { PreToken::BlankLine => { - self.trim_whitespace(stream); - stream.push(PostToken::Newline); - stream.push(PostToken::Newline); + self.blank_line(stream); } PreToken::LineEnd => { self.interrupted = false; @@ -156,16 +154,17 @@ impl Postprocessor { PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => { if self.blank_lines_allowed == LineSpacingPolicy::Yes { - self.trim_whitespace(stream); - stream.push(PostToken::Newline); - stream.push(PostToken::Newline); + self.blank_line(stream); } else { todo!("handle line spacing policy") } } Trivia::Comment(comment) => match comment { Comment::Preceding(value) => { - if stream.0.last() != Some(&PostToken::Newline) { + if !matches!( + stream.0.last(), + Some(&PostToken::Newline) | Some(&PostToken::Indent) + ) { self.interrupted = true; } self.end_line(stream); @@ -206,7 +205,7 @@ impl Postprocessor { stream.trim_while(|token| matches!(token, PostToken::Space | PostToken::Indent)); } - /// Ends the current line. + /// Ends the current line without resetting the interrupted flag. /// /// Removes any trailing spaces or indents and adds a newline only if state /// is not [`LinePosition::StartOfLine`]. State is then set to @@ -236,4 +235,13 @@ impl Postprocessor { stream.push(PostToken::Indent); } } + + /// Creates a blank line and then indents. + fn blank_line(&mut self, stream: &mut TokenStream) { + self.trim_whitespace(stream); + stream.push(PostToken::Newline); + stream.push(PostToken::Newline); + self.position = LinePosition::StartOfLine; + self.indent(stream); + } } diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 0bcb6003b..0455a8940 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -121,13 +121,17 @@ impl TokenStream { self.0.push(PreToken::WordEnd); } - /// Inserts an indent start token to the stream. + /// Inserts an indent start token to the stream. This will also end the + /// current line. pub fn increment_indent(&mut self) { + self.end_line(); self.0.push(PreToken::IndentStart); } - /// Inserts an indent end token to the stream. + /// Inserts an indent end token to the stream. This will also end the + /// current line. pub fn decrement_indent(&mut self) { + self.end_line(); self.0.push(PreToken::IndentEnd); } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index d0753026a..1a6bca77f 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -3,6 +3,7 @@ use wdl_ast::AstToken; use wdl_ast::SyntaxKind; +pub mod expr; pub mod import; pub mod r#struct; pub mod task; @@ -72,44 +73,6 @@ pub fn format_version_statement(element: &FormatElement, stream: &mut TokenStrea stream.end_line(); } -/// Formats a [`LiteralString`](wdl_ast::v1::LiteralString). -pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("literal string children") { - match child.element().kind() { - SyntaxKind::SingleQuote => { - stream.push_literal_in_place_of_token( - child.element().as_token().expect("token"), - "\"".to_owned(), - ); - } - _ => { - (&child).write(stream); - } - } - } -} - -/// Formats a [`LiteralBoolean`](wdl_ast::v1::LiteralBoolean). -pub fn format_literal_boolean(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("literal boolean children"); - let bool = children.next().expect("literal boolean token"); - (&bool).write(stream); - assert!(children.next().is_none()); -} - -/// Formats a [`LiteralInteger`](wdl_ast::v1::LiteralInteger). -pub fn format_literal_integer(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("literal integer children") { - (&child).write(stream); - } -} -/// Formats a [`LiteralFloat`](wdl_ast::v1::LiteralFloat). -pub fn format_literal_float(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("literal float children") { - (&child).write(stream); - } -} - /// Formats a [`LiteralNull`](wdl_ast::v1::LiteralNull). pub fn format_literal_null(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("literal null children"); @@ -120,10 +83,16 @@ pub fn format_literal_null(element: &FormatElement, stream: &mut TokenStream

) {
-    let mut children = element.children().expect("primitive type children");
-    let t = children.next().expect("primitive type token");
-    (&t).write(stream);
-    assert!(children.next().is_none());
+    for child in element.children().expect("primitive type children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats an [`ArrayType`](wdl_ast::v1::ArrayType).
+pub fn format_array_type(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("array type children") {
+        (&child).write(stream);
+    }
 }
 
 /// Formats a [`TypeRef`](wdl_ast::v1::TypeRef).
@@ -164,18 +133,20 @@ pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream>();
@@ -190,6 +161,47 @@ pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("metadata array children");
+
+    let open_bracket = children.next().expect("metadata array open bracket");
+    assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket);
+    (&open_bracket).write(stream);
+    stream.increment_indent();
+
+    let mut close_bracket = None;
+    let mut commas = Vec::new();
+    let items = children
+        .filter(|child| {
+            if child.element().kind() == SyntaxKind::CloseBracket {
+                close_bracket = Some(child.to_owned());
+                false
+            } else if child.element().kind() == SyntaxKind::Comma {
+                commas.push(child.to_owned());
+                false
+            } else {
+                true
+            }
+        })
+        .collect::>();
+
+    let mut commas = commas.iter();
+    for item in items {
+        (&item).write(stream);
+        if let Some(comma) = commas.next() {
+            (comma).write(stream);
+            stream.end_line();
+        } else {
+            stream.push_literal(",".to_string(), SyntaxKind::Comma);
+            stream.end_line();
+        }
+    }
+
+    stream.decrement_indent();
+    (&close_bracket.expect("metadata array close bracket")).write(stream);
+}
+
 /// Formats a [`MetadataObject`](wdl_ast::v1::MetadataObject).
 pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream) {
     let mut children = element.children().expect("metadata object children");
@@ -197,7 +209,6 @@ pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream<
     let open_brace = children.next().expect("metadata object open brace");
     assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
     (&open_brace).write(stream);
-    stream.end_line();
     stream.increment_indent();
 
     let mut close_brace = None;
@@ -221,13 +232,16 @@ pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream<
     for item in items {
         (&item).write(stream);
         if let Some(comma) = commas.next() {
-            (&comma).write(stream);
+            (comma).write(stream);
             stream.end_line();
         } else {
             stream.push_literal(",".to_string(), SyntaxKind::Comma);
             stream.end_line();
         }
     }
+
+    stream.decrement_indent();
+    (&close_brace.expect("metadata object close brace")).write(stream);
 }
 
 /// Formats a [`MetadataObjectItem`](wdl_ast::v1::MetadataObjectItem).
@@ -245,7 +259,6 @@ pub fn format_metadata_object_item(element: &FormatElement, stream: &mut TokenSt
 
     let value = children.next().expect("metadata object item value");
     (&value).write(stream);
-    stream.end_line();
 
     assert!(children.next().is_none());
 }
@@ -262,24 +275,24 @@ pub fn format_metadata_section(element: &FormatElement, stream: &mut TokenStream
     let open_brace = children.next().expect("metadata section open brace");
     assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
     (&open_brace).write(stream);
-    stream.end_line();
     stream.increment_indent();
 
     let mut close_brace = None;
-    let metadata = children
-        .filter_map(|child| {
+    let items = children
+        .filter(|child| {
             if child.element().kind() == SyntaxKind::MetadataObjectItemNode {
-                Some(child)
+                true
             } else {
                 assert!(child.element().kind() == SyntaxKind::CloseBrace);
-                close_brace = Some(child.clone());
-                None
+                close_brace = Some(child.to_owned());
+                false
             }
         })
         .collect::>();
 
-    for item in metadata {
+    for item in items {
         (&item).write(stream);
+        stream.end_line();
     }
 
     stream.decrement_indent();
@@ -299,30 +312,57 @@ pub fn format_parameter_metadata_section(
     (¶meter_meta_keyword).write(stream);
     stream.end_word();
 
-    let open_brace = children.next().expect("parameter metadata section open brace");
+    let open_brace = children
+        .next()
+        .expect("parameter metadata section open brace");
     assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
     (&open_brace).write(stream);
-    stream.end_line();
     stream.increment_indent();
 
     let mut close_brace = None;
-    let metadata = children
-        .filter_map(|child| {
+    let items = children
+        .filter(|child| {
             if child.element().kind() == SyntaxKind::MetadataObjectItemNode {
-                Some(child)
+                true
             } else {
                 assert!(child.element().kind() == SyntaxKind::CloseBrace);
-                close_brace = Some(child.clone());
-                None
+                close_brace = Some(child.to_owned());
+                false
             }
         })
         .collect::>();
 
-    for item in metadata {
+    for item in items {
         (&item).write(stream);
+        stream.end_line();
     }
 
     stream.decrement_indent();
     (&close_brace.expect("parameter metadata section close brace")).write(stream);
     stream.end_line();
 }
+
+/// Formats an [`OutputSection`](wdl_ast::v1::OutputSection).
+pub fn format_output_section(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("output section children");
+
+    let output_keyword = children.next().expect("output keyword");
+    assert!(output_keyword.element().kind() == SyntaxKind::OutputKeyword);
+    (&output_keyword).write(stream);
+    stream.end_word();
+
+    let open_brace = children.next().expect("output section open brace");
+    assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
+    (&open_brace).write(stream);
+    stream.increment_indent();
+
+    for child in children {
+        if child.element().kind() == SyntaxKind::CloseBrace {
+            stream.decrement_indent();
+        } else {
+            assert!(child.element().kind() == SyntaxKind::BoundDeclNode);
+        }
+        (&child).write(stream);
+    }
+    stream.end_line();
+}
diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs
new file mode 100644
index 000000000..ca9a470b3
--- /dev/null
+++ b/wdl-format/src/v1/expr.rs
@@ -0,0 +1,313 @@
+//! Formatting of WDL v1.x expression elements.
+
+use wdl_ast::SyntaxKind;
+
+use crate::PreToken;
+use crate::TokenStream;
+use crate::Writable as _;
+use crate::element::FormatElement;
+
+/// Formats a [`LiteralString`](wdl_ast::v1::LiteralString).
+pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("literal string children") {
+        match child.element().kind() {
+            SyntaxKind::SingleQuote => {
+                stream.push_literal_in_place_of_token(
+                    child.element().as_token().expect("token"),
+                    "\"".to_owned(),
+                );
+            }
+            _ => {
+                (&child).write(stream);
+            }
+        }
+    }
+}
+
+/// Formats a [`LiteralBoolean`](wdl_ast::v1::LiteralBoolean).
+pub fn format_literal_boolean(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("literal boolean children");
+    let bool = children.next().expect("literal boolean token");
+    (&bool).write(stream);
+    assert!(children.next().is_none());
+}
+
+/// Formats a [`NegationExpr`](wdl_ast::v1::NegationExpr).
+pub fn format_negation_expr(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("negation expr children");
+    let minus = children.next().expect("negation expr minus");
+    assert!(minus.element().kind() == SyntaxKind::Minus);
+    (&minus).write(stream);
+
+    let expr = children.next().expect("negation expr expr");
+    (&expr).write(stream);
+    assert!(children.next().is_none());
+}
+
+/// Formats a [`LiteralInteger`](wdl_ast::v1::LiteralInteger).
+pub fn format_literal_integer(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("literal integer children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats a [`LiteralFloat`](wdl_ast::v1::LiteralFloat).
+pub fn format_literal_float(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("literal float children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats a [`NameReference`](wdl_ast::v1::NameRef).
+pub fn format_name_ref(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("name ref children");
+    let name = children.next().expect("name ref name");
+    (&name).write(stream);
+    assert!(children.next().is_none());
+}
+
+/// Formats a [`LiteralArray`](wdl_ast::v1::LiteralArray).
+pub fn format_literal_array(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("literal array children");
+
+    let open_bracket = children.next().expect("literal array open bracket");
+    assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket);
+    (&open_bracket).write(stream);
+    stream.increment_indent();
+
+    let mut close_bracket = None;
+    let mut commas = Vec::new();
+    let items = children
+        .filter(|child| {
+            if child.element().kind() == SyntaxKind::CloseBracket {
+                close_bracket = Some(child.to_owned());
+                false
+            } else if child.element().kind() == SyntaxKind::Comma {
+                commas.push(child.to_owned());
+                false
+            } else {
+                true
+            }
+        })
+        .collect::>();
+
+    let mut commas = commas.iter();
+    for item in items {
+        (&item).write(stream);
+        if let Some(comma) = commas.next() {
+            (comma).write(stream);
+            stream.end_line();
+        } else {
+            stream.push_literal(",".to_string(), SyntaxKind::Comma);
+            stream.end_line();
+        }
+    }
+
+    stream.decrement_indent();
+    (&close_bracket.expect("literal array close bracket")).write(stream);
+}
+
+/// Formats a [`AccessExpr`](wdl_ast::v1::AccessExpr).
+pub fn format_access_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("access expr children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats a [`CallExpr`](wdl_ast::v1::CallExpr).
+pub fn format_call_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("call expr children") {
+        (&child).write(stream);
+        if child.element().kind() == SyntaxKind::Comma {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats an [`IndexExpr`](wdl_ast::v1::IndexExpr).
+pub fn format_index_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("index expr children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats an [`AdditionExpr`](wdl_ast::v1::AdditionExpr).
+pub fn format_addition_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("addition expr children") {
+        let kind = child.element().kind();
+        if kind == SyntaxKind::Plus {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if kind == SyntaxKind::Plus {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`MultiplicationExpr`](wdl_ast::v1::MultiplicationExpr).
+pub fn format_multiplication_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("multiplication expr children") {
+        let kind = child.element().kind();
+        if kind == SyntaxKind::Asterisk {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if kind == SyntaxKind::Asterisk {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`LogicalAndExpr`](wdl_ast::v1::LogicalAndExpr).
+pub fn format_logical_and_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("logical and expr children") {
+        let kind = child.element().kind();
+        if kind == SyntaxKind::LogicalAnd {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if kind == SyntaxKind::LogicalAnd {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`LogicalNotExpr`](wdl_ast::v1::LogicalNotExpr).
+pub fn format_logical_not_expr(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("logical not expr children");
+    let not = children.next().expect("logical not expr not");
+    assert!(not.element().kind() == SyntaxKind::Exclamation);
+    (¬).write(stream);
+
+    let expr = children.next().expect("logical not expr expr");
+    (&expr).write(stream);
+    assert!(children.next().is_none());
+}
+
+/// Formats a [`LogicalOrExpr`](wdl_ast::v1::LogicalOrExpr).
+pub fn format_logical_or_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("logical or expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::LogicalOr;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats an [`EqualityExpr`](wdl_ast::v1::EqualityExpr).
+pub fn format_equality_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("equality expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::Equal;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`InequalityExpr`](wdl_ast::v1::InequalityExpr).
+pub fn format_inequality_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("inequality expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::NotEqual;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`LessExpr`](wdl_ast::v1::LessExpr).
+pub fn format_less_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("less expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::Less;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`LessEqualExpr`](wdl_ast::v1::LessEqualExpr).
+pub fn format_less_equal_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("less equal expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::LessEqual;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`GreaterExpr`](wdl_ast::v1::GreaterExpr).
+pub fn format_greater_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("greater expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::Greater;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`GreaterEqualExpr`](wdl_ast::v1::GreaterEqualExpr).
+pub fn format_greater_equal_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("greater equal expr children") {
+        let should_end_word = child.element().kind() == SyntaxKind::GreaterEqual;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
+
+/// Formats a [`ParenthesizedExpr`](wdl_ast::v1::ParenthesizedExpr).
+pub fn format_parenthesized_expr(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("parenthesized expr children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats an [`IfExpr`](wdl_ast::v1::IfExpr).
+pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("if expr children");
+
+    let if_keyword = children.next().expect("if keyword");
+    assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword);
+    (&if_keyword).write(stream);
+    stream.end_word();
+
+    for child in children {
+        let kind = child.element().kind();
+        let should_end_word = kind == SyntaxKind::ThenKeyword || kind == SyntaxKind::ElseKeyword;
+        if should_end_word {
+            stream.end_word();
+        }
+        (&child).write(stream);
+        if should_end_word {
+            stream.end_word();
+        }
+    }
+}
diff --git a/wdl-format/src/v1/struct.rs b/wdl-format/src/v1/struct.rs
index faeb8e8e0..eb2345363 100644
--- a/wdl-format/src/v1/struct.rs
+++ b/wdl-format/src/v1/struct.rs
@@ -73,3 +73,70 @@ pub fn format_struct_definition(element: &FormatElement, stream: &mut TokenStrea
     (&close_brace.expect("struct definition close brace")).write(stream);
     stream.end_line();
 }
+
+/// Formats a [`LiteralStructItem`](wdl_ast::v1::LiteralStructItem).
+pub fn format_literal_struct_item(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("literal struct item children");
+
+    let key = children.next().expect("literal struct item key");
+    assert!(key.element().kind() == SyntaxKind::Ident);
+    (&key).write(stream);
+
+    let colon = children.next().expect("literal struct item colon");
+    assert!(colon.element().kind() == SyntaxKind::Colon);
+    (&colon).write(stream);
+    stream.end_word();
+
+    for child in children {
+        (&child).write(stream);
+    }
+}
+
+/// Formats a [`LiteralStruct`](wdl_ast::v1::LiteralStruct).
+pub fn format_literal_struct(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("literal struct children");
+
+    let name = children.next().expect("literal struct name");
+    assert!(name.element().kind() == SyntaxKind::Ident);
+    (&name).write(stream);
+    stream.end_word();
+
+    let open_brace = children.next().expect("literal struct open brace");
+    assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
+    (&open_brace).write(stream);
+    stream.end_line();
+    stream.increment_indent();
+
+    let mut close_brace = None;
+    let mut commas = Vec::new();
+    let members = children
+        .filter(|child| {
+            if child.element().kind() == SyntaxKind::LiteralStructItemNode {
+                true
+            } else if child.element().kind() == SyntaxKind::Comma {
+                commas.push(child.to_owned());
+                false
+            } else {
+                assert!(child.element().kind() == SyntaxKind::CloseBrace);
+                close_brace = Some(child.to_owned());
+                false
+            }
+        })
+        .collect::>();
+
+    let mut commas = commas.iter();
+    for member in members {
+        (&member).write(stream);
+        if let Some(comma) = commas.next() {
+            (comma).write(stream);
+            stream.end_line();
+        } else {
+            stream.push_literal(",".to_string(), SyntaxKind::Comma);
+            stream.end_line();
+        }
+    }
+
+    stream.decrement_indent();
+    (&close_brace.expect("literal struct close brace")).write(stream);
+    stream.end_line();
+}
diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs
index 08686309d..807635818 100644
--- a/wdl-format/src/v1/workflow.rs
+++ b/wdl-format/src/v1/workflow.rs
@@ -9,6 +9,87 @@ use crate::TokenStream;
 use crate::Writable as _;
 use crate::element::FormatElement;
 
+/// Formats a [`ConditionalStatement`](wdl_ast::v1::ConditionalStatement).
+pub fn format_conditional_statement(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("conditional statement children");
+
+    let if_keyword = children.next().expect("if keyword");
+    assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword);
+    (&if_keyword).write(stream);
+    stream.end_word();
+
+    let open_paren = children.next().expect("open paren");
+    assert!(open_paren.element().kind() == SyntaxKind::OpenParen);
+    (&open_paren).write(stream);
+
+    for child in children.by_ref() {
+        (&child).write(stream);
+        if child.element().kind() == SyntaxKind::CloseParen {
+            stream.end_word();
+            break;
+        }
+    }
+
+    let open_brace = children.next().expect("open brace");
+    assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
+    (&open_brace).write(stream);
+    stream.increment_indent();
+
+    for child in children {
+        if child.element().kind() == SyntaxKind::CloseBrace {
+            stream.decrement_indent();
+        }
+        (&child).write(stream);
+    }
+    stream.end_line();
+}
+
+/// Formats a [`ScatterStatement`](wdl_ast::v1::ScatterStatement).
+pub fn format_scatter_statement(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("scatter statement children");
+
+    let scatter_keyword = children.next().expect("scatter keyword");
+    assert!(scatter_keyword.element().kind() == SyntaxKind::ScatterKeyword);
+    (&scatter_keyword).write(stream);
+    stream.end_word();
+
+    let open_paren = children.next().expect("open paren");
+    assert!(open_paren.element().kind() == SyntaxKind::OpenParen);
+    (&open_paren).write(stream);
+
+    let variable = children.next().expect("scatter variable");
+    assert!(variable.element().kind() == SyntaxKind::Ident);
+    (&variable).write(stream);
+    stream.end_word();
+
+    let in_keyword = children.next().expect("in keyword");
+    assert!(in_keyword.element().kind() == SyntaxKind::InKeyword);
+    (&in_keyword).write(stream);
+    stream.end_word();
+
+    for child in children.by_ref() {
+        (&child).write(stream);
+        if child.element().kind() == SyntaxKind::CloseParen {
+            stream.end_word();
+            break;
+        }
+    }
+
+    let open_brace = children.next().expect("open brace");
+    assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
+    (&open_brace).write(stream);
+    stream.end_line();
+    stream.increment_indent();
+
+    for child in children {
+        if child.element().kind() == SyntaxKind::CloseBrace {
+            stream.decrement_indent();
+        }
+        (&child).write(stream);
+    }
+    stream.end_line();
+}
+
 /// Formats a [`WorkflowDefinition`](wdl_ast::v1::WorkflowDefinition).
 pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) {
     let mut children = element.children().expect("workflow definition children");
@@ -26,7 +107,6 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr
     let open_brace = children.next().expect("open brace");
     assert!(open_brace.element().kind() == SyntaxKind::OpenBrace);
     (&open_brace).write(stream);
-    stream.end_line();
     stream.increment_indent();
 
     let mut meta = None;
@@ -56,6 +136,9 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr
             SyntaxKind::ConditionalStatementNode => {
                 body.push(child.clone());
             }
+            SyntaxKind::ScatterStatementNode => {
+                body.push(child.clone());
+            }
             SyntaxKind::OutputSectionNode => {
                 output = Some(child.clone());
             }
diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs
index 4018c44f1..c8eeb4953 100644
--- a/wdl-format/src/v1/workflow/call.rs
+++ b/wdl-format/src/v1/workflow/call.rs
@@ -7,27 +7,94 @@ use crate::TokenStream;
 use crate::Writable as _;
 use crate::element::FormatElement;
 
+/// Formats a [`CallTarget`](wdl_ast::v1::CallTarget).
+pub fn format_call_target(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("call target children") {
+        (&child).write(stream);
+    }
+}
+
+/// Formats a [`CallAlias`](wdl_ast::v1::CallAlias).
+pub fn format_call_alias(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("call alias children") {
+        (&child).write(stream);
+        stream.end_word();
+    }
+}
+
+/// Formats a [`CallAfter`](wdl_ast::v1::CallAfter).
+pub fn format_call_after(element: &FormatElement, stream: &mut TokenStream) {
+    for child in element.children().expect("call after children") {
+        (&child).write(stream);
+        stream.end_word();
+    }
+}
+
+/// Formats a [`CallInputItem`](wdl_ast::v1::CallInputItem).
+pub fn format_call_input_item(element: &FormatElement, stream: &mut TokenStream) {
+    let mut children = element.children().expect("call input item children");
+
+    let name = children.next().expect("call input item name");
+    (&name).write(stream);
+    stream.end_word();
+
+    let equals = children.next();
+    if let Some(equals) = equals {
+        (&equals).write(stream);
+        stream.end_word();
+
+        let value = children.next().expect("call input item value");
+        (&value).write(stream);
+    }
+}
+
 /// Formats a [`CallStatement`](wdl_ast::v1::CallStatement).
 pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream) {
-    for child in element.children().expect("call statement children") {
+    let mut children = element.children().expect("call statement children");
+
+    let call_keyword = children.next().expect("call keyword");
+    assert!(call_keyword.element().kind() == SyntaxKind::CallKeyword);
+    (&call_keyword).write(stream);
+    stream.end_word();
+
+    let target = children.next().expect("call target");
+    (&target).write(stream);
+    stream.end_word();
+
+    let mut alias = None;
+    let mut afters = Vec::new();
+    let mut open_brace = None;
+    let mut input_keyword = None;
+    let mut colon = None;
+    let mut inputs = Vec::new();
+    let mut commas = Vec::new();
+    let mut close_brace = None;
+
+    for child in children {
         match child.element().kind() {
-            SyntaxKind::CallKeyword => {
-                (&child).write(stream);
-                stream.end_word();
+            SyntaxKind::CallAliasNode => {
+                alias = Some(child.clone());
             }
-            SyntaxKind::CallTargetNode => {
-                (&child).write(stream);
-                stream.end_word();
+            SyntaxKind::CallAfterNode => {
+                afters.push(child.clone());
             }
             SyntaxKind::OpenBrace => {
-                (&child).write(stream);
-                stream.end_line();
-                stream.increment_indent();
+                open_brace = Some(child.clone());
+            }
+            SyntaxKind::InputKeyword => {
+                input_keyword = Some(child.clone());
+            }
+            SyntaxKind::Colon => {
+                colon = Some(child.clone());
+            }
+            SyntaxKind::CallInputItemNode => {
+                inputs.push(child.clone());
+            }
+            SyntaxKind::Comma => {
+                commas.push(child.clone());
             }
             SyntaxKind::CloseBrace => {
-                stream.decrement_indent();
-                (&child).write(stream);
-                stream.end_line();
+                close_brace = Some(child.clone());
             }
             _ => {
                 unreachable!(
@@ -37,11 +104,55 @@ pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream

) { - for child in element.children().expect("call target children") { - (&child).write(stream); + if let Some(alias) = alias { + (&alias).write(stream); + stream.end_word(); + } + + for after in afters { + (&after).write(stream); + stream.end_word(); + } + + if let Some(open_brace) = open_brace { + (&open_brace).write(stream); + stream.end_word(); + + if let Some(input_keyword) = input_keyword { + (&input_keyword).write(stream); + (&colon.expect("colon")).write(stream); + stream.end_word(); + } + + // TODO: Make this check smarter in case a single input spans multiple lines or + // is interrupted + let single_line = inputs.len() == 1; + if !single_line { + stream.increment_indent(); + } + + let mut commas = commas.iter(); + for input in inputs { + (&input).write(stream); + + if let Some(comma) = commas.next() { + (comma).write(stream); + } else if !single_line { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + + if !single_line { + stream.end_line(); + } + } + + if !single_line { + stream.decrement_indent(); + } else { + stream.end_word(); + } + (&close_brace.expect("close brace")).write(stream); + stream.end_line(); } } diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl new file mode 100644 index 000000000..05e82bd42 --- /dev/null +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -0,0 +1,939 @@ +version 1.0 + +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/tasks/sicer.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra +import "workflows/workflows/bamtogff.wdl" +import "workflows/workflows/mapping.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/workflows/visualization.wdl" as viz + +workflow seaseq { + + meta { + title: "SEAseq Analysis" + summary: "Single-End Antibody Sequencing (SEAseq) Pipeline" + description: "A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis." + version: "2.0.0" + details: { + citation: "https://doi.org/10.1186/s12859-022-04588-z", + contactEmail: "modupeore.adetunji@stjude.org", + contactOrg: "St Jude Children's Research Hospital", + contactUrl: "", + upstreamLicenses: "MIT", + upstreamUrl: "https://github.com/stjude/seaseq", + whatsNew: [ + { + version: "2.0", + changes: [ + "version of case/sample only", + "single-end sequencing with input/control sequencing data", + "Initial release", + ], + }, + ], + } + parameter_group: { + reference_genome: { + title: "Reference genome", + description: "Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .", + help: "Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.", + }, + input_genomic_data: { + title: "Input FASTQ data", + description: "Genomic input files for experiment.", + help: "Input one or more sample data and/or SRA identifiers.", + }, + analysis_parameter: { + title: "Analysis parameter", + description: "Analysis settings needed for experiment.", + help: "Analysis settings; such output analysis file name.", + }, + } + } + + parameter_meta { + reference: { + description: "Reference FASTA file", + group: "reference_genome", + patterns: [ + "*.fa", + "*.fasta", + "*.fa.gz", + "*.fasta.gz", + ], + } + blacklist: { + description: "Blacklist file in BED format", + group: "reference_genome", + help: "If defined, blacklist regions listed are excluded after reference alignment.", + patterns: [ + "*.bed", + "*.bed.gz", + ], + } + gtf: { + description: "gene annotation file (.gtf)", + group: "reference_genome", + help: "Input gene annotation file from RefSeq or GENCODE (.gtf).", + patterns: [ + "*.gtf", + "*.gtf.gz", + "*.gff", + "*.gff.gz", + "*.gff3", + "*.gff3.gz", + ], + } + bowtie_index: { + description: "bowtie v1 index files (*.ebwt)", + group: "reference_genome", + help: "If not defined, bowtie v1 index files are generated, will take a longer compute time.", + patterns: [ + "*.ebwt", + ], + } + motif_databases: { + description: "One or more of the MEME suite motif databases (*.meme)", + group: "reference_genome", + help: "Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).", + patterns: [ + "*.meme", + ], + } + sample_sraid: { + description: "One or more sample SRA (Sequence Read Archive) run identifiers", + group: "input_genomic_data", + help: "Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).", + example: "SRR12345678", + } + sample_fastq: { + description: "One or more sample FASTQs", + group: "input_genomic_data", + help: "Upload zipped FASTQ files.", + patterns: [ + "*.fq.gz", + "*.fastq.gz", + ], + } + results_name: { + description: "Experiment results custom name", + group: "analysis_parameter", + help: "Input preferred analysis results name (recommended if multiple FASTQs are provided).", + example: "AllMerge_mapped", + } + run_motifs: { + description: "Perform Motif Analysis", + group: "analysis_parameter", + help: "Setting this means Motif Discovery and Enrichment analysis will be performed.", + example: true, + } + } + + input { + # group: reference_genome + File reference + File? spikein_reference + File? blacklist + File gtf + Array[File]? bowtie_index + Array[File]? spikein_bowtie_index + Array[File]? motif_databases + + # group: input_genomic_data + Array[String]? sample_sraid + Array[File]? sample_fastq + + # group: analysis_parameter + String? results_name + Boolean run_motifs = true + + } + + String pipeline_ver = "v2.0.0" + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 1 ----------- ### + ### ------ Pre-process Analysis Files ------ ### + ### ---------------------------------------- ### + + # Process SRRs + if (defined(sample_sraid)) { + # Download sample file(s) from SRA database + # outputs: + # fastqdump.fastqfile : downloaded sample files in fastq.gz format + Array[String] string_sra = [ + 1, + ] #buffer to allow for sra_id optionality + Array[String] s_sraid = select_first([ + sample_sraid, + string_sra, + ]) + scatter (eachsra in s_sraid) { + call sra.fastqdump { input: + sra_id = eachsra, + cloud = false, + } + } # end scatter each sra + + Array[File] sample_srafile = flatten(fastqdump.fastqfile) + } # end if sample_sraid + + # Generating INDEX files + #1. Bowtie INDEX files if not provided + if (!defined(bowtie_index)) { + # create bowtie index when not provided + call bowtie.index as bowtie_idx { input: reference = reference } + } + #2. Make sure indexes are six else build indexes + if (defined(bowtie_index)) { + # check total number of bowtie indexes provided + Array[String] string_bowtie_index = [ + 1, + ] #buffer to allow for bowtie_index optionality + Array[File] int_bowtie_index = select_first([ + bowtie_index, + string_bowtie_index, + ]) + if (length(int_bowtie_index) != 6) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as bowtie_idx_2 { input: reference = reference } + } + } + Array[File] actual_bowtie_index = select_first([ + bowtie_idx_2.bowtie_indexes, + bowtie_idx.bowtie_indexes, + bowtie_index, + ]) + + # Spike-in DNA + #3. Bowtie INDEX files if not provided + String string_spikein = "1" + Array[String] string_spikein_buffer = [ + 1, + ] + if (!defined(spikein_bowtie_index) && defined(spikein_reference)) { + # create bowtie index on spikein genome + call bowtie.index as spikein_bowtie_idx { input: reference = select_first([ + spikein_reference, + string_spikein, + ]) } + } + + #4. Make sure indexes are six else build indexes for Spike-in DNA + if (defined(spikein_bowtie_index)) { + # check total number of bowtie indexes provided + Array[File] int_spikein_bowtie_index = select_first([ + spikein_bowtie_index, + string_spikein_buffer, + ]) + if (length(int_spikein_bowtie_index) != 6) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as spikein_bowtie_idx_2 { input: reference = select_first([ + spikein_reference, + string_spikein, + ]) } + } + } + Array[File] actual_spikein_bowtie_index = select_first([ + spikein_bowtie_idx_2.bowtie_indexes, + spikein_bowtie_idx.bowtie_indexes, + spikein_bowtie_index, + string_spikein_buffer, + ]) + + # FASTA faidx and chromsizes and effective genome size + call samtools.faidx as samtools_faidx { + # create FASTA index and chrom sizes files + input: reference = reference } + call util.effective_genome_size as egs { + # effective genome size for FASTA + input: reference = reference } + + # Process FASTQs + if (defined(sample_fastq)) { + + Array[String] string_fastq = [ + 1, + ] #buffer to allow for fastq optionality + Array[File] s_fastq = select_first([ + sample_fastq, + string_fastq, + ]) + + Array[File] sample_fastqfile = s_fastq + } + Array[File] original_fastqfiles = flatten(select_all([ + sample_srafile, + sample_fastqfile, + ])) + + ### ------------------------------------------------- ### + ### ---------------- S E C T I O N 1 ---------------- ### + ### ----------- B: remove Spike-IN reads ------------ ### + ### ------------------------------------------------- ### + + # if multiple fastqfiles are provided + Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false + Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false + + if (defined(spikein_bowtie_index) || defined(spikein_reference)) { + scatter (eachfastq in original_fastqfiles) { + call fastqc.fastqc as spikein_indv_fastqc { input: + inputfile = eachfastq, + default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC", + } + call util.basicfastqstats as spikein_indv_bfs { input: + fastqfile = eachfastq, + default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats", + } + call bowtie.spikein_SE as spikein_indv_map { input: + fastqfile = eachfastq, + index_files = actual_spikein_bowtie_index, + metricsfile = spikein_indv_bfs.metrics_out, + default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats", + } + } + + Array[File] spikein_fastqfiles = spikein_indv_map.unaligned + } + Array[File] fastqfiles = select_first([ + spikein_fastqfiles, + original_fastqfiles, + ]) + + ### ------------------------------------------------- ### + ### ---------------- S E C T I O N 2 ---------------- ### + ### ---- A: analysis if multiple FASTQs provided ---- ### + ### ------------------------------------------------- ### + + if (multi_fastq) { + scatter (eachfastq in fastqfiles) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as indv_fastqc { input: + inputfile = eachfastq, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call util.basicfastqstats as indv_bfs { input: + fastqfile = eachfastq, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + } + + call mapping.mapping as indv_mapping { input: + fastqfile = eachfastq, + index_files = actual_bowtie_index, + metricsfile = indv_bfs.metrics_out, + blacklist = blacklist, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/BAM_files", + } + + call fastqc.fastqc as indv_bamfqc { input: + inputfile = indv_mapping.sorted_bam, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call runspp.runspp as indv_runspp { input: bamfile = select_first([ + indv_mapping.bklist_bam, + indv_mapping.sorted_bam, + ]) } + + call bedtools.bamtobed as indv_bamtobed { input: bamfile = select_first([ + indv_mapping.bklist_bam, + indv_mapping.sorted_bam, + ]) } + + call util.evalstats as indv_summarystats { input: + fastq_type = "SEAseq Sample FASTQ", + bambed = indv_bamtobed.bedfile, + sppfile = indv_runspp.spp_out, + fastqczip = indv_fastqc.zipfile, + bamflag = indv_mapping.bam_stats, + rmdupflag = indv_mapping.mkdup_stats, + bkflag = indv_mapping.bklist_stats, + fastqmetrics = indv_bfs.metrics_out, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + } + } # end scatter (for each sample fastq) + + # MERGE BAM FILES + # Execute analysis on merge bam file + # Analysis executed: + # Merge BAM (if more than 1 fastq is provided) + # FastQC on Merge BAM (AllMerge__mapped) + + # merge bam files and perform fasTQC if more than one is provided + call util.mergehtml { input: + htmlfiles = indv_summarystats.xhtml, + txtfiles = indv_summarystats.textfile, + default_location = "SAMPLE", + outputfile = "AllMapped_" + length(fastqfiles) + "_seaseq-summary-stats.html", + } + + call samtools.mergebam { input: + bamfiles = indv_mapping.sorted_bam, + metricsfiles = indv_bfs.metrics_out, + default_location = if defined(results_name) then results_name + "/BAM_files" else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files", + outputfile = if defined(results_name) then results_name + ".sorted.bam" else "AllMerge_" + length(fastqfiles) + "_mapped.sorted.bam", + } + + call fastqc.fastqc as mergebamfqc { input: + inputfile = mergebam.mergebam, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/QC/FastQC", + } + + call samtools.indexstats as mergeindexstats { input: + bamfile = mergebam.mergebam, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/BAM_files", + } + + if (defined(blacklist)) { + # remove blacklist regions + String string_blacklist = "" #buffer to allow for blacklist optionality + File blacklist_file = select_first([ + blacklist, + string_blacklist, + ]) + call bedtools.intersect as merge_rmblklist { input: + fileA = mergebam.mergebam, + fileB = blacklist_file, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/BAM_files", + nooverlap = true, + } + call samtools.indexstats as merge_bklist { input: + bamfile = merge_rmblklist.intersect_out, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/BAM_files", + } + } # end if blacklist provided + + File mergebam_afterbklist = select_first([ + merge_rmblklist.intersect_out, + mergebam.mergebam, + ]) + + call samtools.markdup as merge_markdup { input: + bamfile = mergebam_afterbklist, + default_location = sub(basename(mergebam_afterbklist), ".sorted.b.*$", "") + "/BAM_files", + } + + call samtools.indexstats as merge_mkdup { input: + bamfile = merge_markdup.mkdupbam, + default_location = sub(basename(mergebam_afterbklist), ".sorted.b.*$", "") + "/BAM_files", + } + } # end if length(fastqfiles) > 1: multi_fastq + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 2 ----------- ### + ### -- B: analysis if one FASTQ provided --- ### + ### ---------------------------------------- ### + + # if only one fastqfile is provided + if (one_fastq) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as uno_fastqc { input: + inputfile = fastqfiles[0], + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call util.basicfastqstats as uno_bfs { input: + fastqfile = fastqfiles[0], + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + } + + call mapping.mapping { input: + fastqfile = fastqfiles[0], + index_files = actual_bowtie_index, + metricsfile = uno_bfs.metrics_out, + blacklist = blacklist, + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/BAM_files", + } + + call fastqc.fastqc as uno_bamfqc { input: + inputfile = mapping.sorted_bam, + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call runspp.runspp as uno_runspp { input: bamfile = select_first([ + mapping.bklist_bam, + mapping.sorted_bam, + ]) } + + call bedtools.bamtobed as uno_bamtobed { input: bamfile = select_first([ + mapping.bklist_bam, + mapping.sorted_bam, + ]) } + } # end if length(fastqfiles) == 1: one_fastq + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 3 ----------- ### + ### ----------- ChIP-seq analysis ---------- ### + ### ---------------------------------------- ### + + # ChIP-seq and downstream analysis + # Execute analysis on merge bam file + # Analysis executed: + # FIRST: Check if reads are mapped + # Peaks identification (SICER, MACS, ROSE) + # Motif analysis + # Complete Summary statistics + + #collate correct files for downstream analysis + File sample_bam = select_first([ + mergebam_afterbklist, + mapping.bklist_bam, + mapping.sorted_bam, + ]) + + call macs.macs { input: + bamfile = sample_bam, + pvalue = "1e-9", + keep_dup = "auto", + egs = egs.genomesize, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-p9_kd-auto", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_p9_kd-auto", + } + + call util.addreadme { input: default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS" } + + call macs.macs as all { input: + bamfile = sample_bam, + pvalue = "1e-9", + keep_dup = "all", + egs = egs.genomesize, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-p9_kd-all", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_p9_kd-all", + } + + call macs.macs as nomodel { input: + bamfile = sample_bam, + nomodel = true, + egs = egs.genomesize, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-nm", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_nm", + } + + call bamtogff.bamtogff { input: + gtffile = gtf, + chromsizes = samtools_faidx.chromsizes, + bamfile = select_first([ + merge_markdup.mkdupbam, + mapping.mkdup_bam, + ]), + bamindex = select_first([ + merge_mkdup.indexbam, + mapping.mkdup_index, + ]), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/BAM_Density", + } + + call bedtools.bamtobed as forsicerbed { input: bamfile = select_first([ + merge_markdup.mkdupbam, + mapping.mkdup_bam, + ]) } + + call sicer.sicer { input: + bedfile = forsicerbed.bedfile, + chromsizes = samtools_faidx.chromsizes, + genome_fraction = egs.genomefraction, + fragmentlength = select_first([ + uno_bfs.readlength, + mergebam.avg_readlength, + ]), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/BROAD_peaks", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/BROAD_peaks", + } + + call rose.rose { input: + gtffile = gtf, + bamfile = select_first([ + merge_markdup.mkdupbam, + mapping.mkdup_bam, + ]), + bamindex = select_first([ + merge_mkdup.indexbam, + mapping.mkdup_index, + ]), + bedfile_auto = macs.peakbedfile, + bedfile_all = all.peakbedfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/STITCHED_peaks", + } + + call runspp.runspp { input: bamfile = sample_bam } + + call util.peaksanno { input: + gtffile = gtf, + bedfile = macs.peakbedfile, + chromsizes = samtools_faidx.chromsizes, + summitfile = macs.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), + } + + call util.peaksanno as all_peaksanno { input: + gtffile = gtf, + bedfile = all.peakbedfile, + chromsizes = samtools_faidx.chromsizes, + summitfile = all.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), + } + + call util.peaksanno as nomodel_peaksanno { input: + gtffile = gtf, + bedfile = nomodel.peakbedfile, + chromsizes = samtools_faidx.chromsizes, + summitfile = nomodel.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), + } + + call util.peaksanno as sicer_peaksanno { input: + gtffile = gtf, + bedfile = sicer.scoreisland, + chromsizes = samtools_faidx.chromsizes, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/BROAD_peaks", + } + + # Motif Analysis + if (run_motifs) { + call motifs.motifs { input: + reference = reference, + reference_index = samtools_faidx.faidx_file, + bedfile = macs.peakbedfile, + motif_databases = motif_databases, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/MOTIFS", + } + + call util.flankbed { input: + bedfile = macs.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/MOTIFS", + } + + call motifs.motifs as flank { input: + reference = reference, + reference_index = samtools_faidx.faidx_file, + bedfile = flankbed.flankbedfile, + motif_databases = motif_databases, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/MOTIFS", + } + } + + call viz.visualization { input: + wigfile = macs.wigfile, + chromsizes = samtools_faidx.chromsizes, + xlsfile = macs.peakxlsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), + } + + call viz.visualization as vizall { input: + wigfile = all.wigfile, + chromsizes = samtools_faidx.chromsizes, + xlsfile = all.peakxlsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), + } + + call viz.visualization as viznomodel { input: + wigfile = nomodel.wigfile, + chromsizes = samtools_faidx.chromsizes, + xlsfile = nomodel.peakxlsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), + } + + call viz.visualization as vizsicer { input: + wigfile = sicer.wigfile, + chromsizes = samtools_faidx.chromsizes, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/BROAD_peaks", + } + + call bedtools.bamtobed as finalbed { input: bamfile = sample_bam } + + call sortbed.sortbed { input: bedfile = finalbed.bedfile } + + call bedtools.intersect { input: + fileA = macs.peakbedfile, + fileB = sortbed.sortbed_out, + countoverlap = true, + sorted = true, + } + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 4 ----------- ### + ### ---------- Summary Statistics ---------- ### + ### ---------------------------------------- ### + + String string_qual = "" #buffer to allow for optionality in if statement + + #SUMMARY STATISTICS + if (one_fastq) { + call util.evalstats as uno_summarystats { + # SUMMARY STATISTICS of sample file (only 1 sample file provided) + input: + fastq_type = "SEAseq Sample FASTQ", + bambed = finalbed.bedfile, + sppfile = runspp.spp_out, + fastqczip = select_first([ + uno_bamfqc.zipfile, + string_qual, + ]), + bamflag = mapping.bam_stats, + rmdupflag = mapping.mkdup_stats, + bkflag = mapping.bklist_stats, + fastqmetrics = uno_bfs.metrics_out, + countsfile = intersect.intersect_out, + peaksxls = macs.peakxlsfile, + enhancers = rose.enhancers, + superenhancers = rose.super_enhancers, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/QC/SummaryStats", + } + + call util.summaryreport as uno_overallsummary { + # Presenting all quality stats for the analysis + input: + overallqc_html = uno_summarystats.xhtml, + overallqc_txt = uno_summarystats.textfile, + } + } # end if one_fastq + + if (multi_fastq) { + call util.evalstats as merge_summarystats { + # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) + input: + fastq_type = "SEAseq Comprehensive", + bambed = finalbed.bedfile, + sppfile = runspp.spp_out, + fastqczip = select_first([ + mergebamfqc.zipfile, + string_qual, + ]), + bamflag = mergeindexstats.flagstats, + rmdupflag = merge_mkdup.flagstats, + bkflag = merge_bklist.flagstats, + countsfile = intersect.intersect_out, + peaksxls = macs.peakxlsfile, + enhancers = rose.enhancers, + superenhancers = rose.super_enhancers, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/QC/SummaryStats", + } + + call util.summaryreport as merge_overallsummary { + # Presenting all quality stats for the analysis + input: + sampleqc_html = mergehtml.xhtml, + overallqc_html = merge_summarystats.xhtml, + sampleqc_txt = mergehtml.mergetxt, + overallqc_txt = merge_summarystats.textfile, + } + } # end if multi_fastq + + output { + #SPIKE-IN + Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile + Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile + Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output + + #FASTQC + Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile + Array[File?]? indv_s_zipfile = indv_fastqc.zipfile + Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile + Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile + + File? s_mergebam_htmlfile = mergebamfqc.htmlfile + File? s_mergebam_zipfile = mergebamfqc.zipfile + + File? uno_s_htmlfile = uno_fastqc.htmlfile + File? uno_s_zipfile = uno_fastqc.zipfile + File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile + File? uno_s_bam_zipfile = uno_bamfqc.zipfile + + #BASICMETRICS + Array[File?]? s_metrics_out = indv_bfs.metrics_out + File? uno_s_metrics_out = uno_bfs.metrics_out + + #BAMFILES + Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam + Array[File?]? indv_s_indexbam = indv_mapping.bam_index + Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam + Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index + Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam + Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index + + File? uno_s_sortedbam = mapping.sorted_bam + File? uno_s_indexstatsbam = mapping.bam_index + File? uno_s_bkbam = mapping.bklist_bam + File? uno_s_bkindexbam = mapping.bklist_index + File? uno_s_rmbam = mapping.mkdup_bam + File? uno_s_rmindexbam = mapping.mkdup_index + + File? s_mergebamfile = mergebam.mergebam + File? s_mergebamindex = mergeindexstats.indexbam + File? s_bkbam = merge_rmblklist.intersect_out + File? s_bkindexbam = merge_bklist.indexbam + File? s_rmbam = merge_markdup.mkdupbam + File? s_rmindexbam = merge_mkdup.indexbam + + #MACS + File? peakbedfile = macs.peakbedfile + File? peakxlsfile = macs.peakxlsfile + File? summitsfile = macs.summitsfile + File? negativexlsfile = macs.negativepeaks + File? wigfile = macs.wigfile + File? all_peakbedfile = all.peakbedfile + File? all_peakxlsfile = all.peakxlsfile + File? all_summitsfile = all.summitsfile + File? all_wigfile = all.wigfile + File? all_negativexlsfile = all.negativepeaks + File? nm_peakbedfile = nomodel.peakbedfile + File? nm_peakxlsfile = nomodel.peakxlsfile + File? nm_summitsfile = nomodel.summitsfile + File? nm_wigfile = nomodel.wigfile + File? nm_negativexlsfile = nomodel.negativepeaks + File? readme_peaks = addreadme.readme_peaks + + #SICER + File? scoreisland = sicer.scoreisland + File? sicer_wigfile = sicer.wigfile + + #ROSE + File? pngfile = rose.pngfile + File? mapped_union = rose.mapped_union + File? mapped_stitch = rose.mapped_stitch + File? enhancers = rose.enhancers + File? super_enhancers = rose.super_enhancers + File? gff_file = rose.gff_file + File? gff_union = rose.gff_union + File? union_enhancers = rose.union_enhancers + File? stitch_enhancers = rose.stitch_enhancers + File? e_to_g_enhancers = rose.e_to_g_enhancers + File? g_to_e_enhancers = rose.g_to_e_enhancers + File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers + File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers + File? supergenes = rose.super_genes + File? allgenes = rose.all_genes + + #MOTIFS + File? flankbedfile = flankbed.flankbedfile + + File? ame_tsv = motifs.ame_tsv + File? ame_html = motifs.ame_html + File? ame_seq = motifs.ame_seq + File? meme = motifs.meme_out + File? meme_summary = motifs.meme_summary + + File? summit_ame_tsv = flank.ame_tsv + File? summit_ame_html = flank.ame_html + File? summit_ame_seq = flank.ame_seq + File? summit_meme = flank.meme_out + File? summit_meme_summary = flank.meme_summary + + #BAM2GFF + File? s_matrices = bamtogff.s_matrices + File? densityplot = bamtogff.densityplot + File? pdf_gene = bamtogff.pdf_gene + File? pdf_h_gene = bamtogff.pdf_h_gene + File? png_h_gene = bamtogff.png_h_gene + File? jpg_h_gene = bamtogff.jpg_h_gene + File? pdf_promoters = bamtogff.pdf_promoters + File? pdf_h_promoters = bamtogff.pdf_h_promoters + File? png_h_promoters = bamtogff.png_h_promoters + File? jpg_h_promoters = bamtogff.jpg_h_promoters + + #PEAKS-ANNOTATION + File? peak_promoters = peaksanno.peak_promoters + File? peak_genebody = peaksanno.peak_genebody + File? peak_window = peaksanno.peak_window + File? peak_closest = peaksanno.peak_closest + File? peak_comparison = peaksanno.peak_comparison + File? gene_comparison = peaksanno.gene_comparison + File? pdf_comparison = peaksanno.pdf_comparison + + File? all_peak_promoters = all_peaksanno.peak_promoters + File? all_peak_genebody = all_peaksanno.peak_genebody + File? all_peak_window = all_peaksanno.peak_window + File? all_peak_closest = all_peaksanno.peak_closest + File? all_peak_comparison = all_peaksanno.peak_comparison + File? all_gene_comparison = all_peaksanno.gene_comparison + File? all_pdf_comparison = all_peaksanno.pdf_comparison + + File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters + File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody + File? nomodel_peak_window = nomodel_peaksanno.peak_window + File? nomodel_peak_closest = nomodel_peaksanno.peak_closest + File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison + File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison + File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison + + File? sicer_peak_promoters = sicer_peaksanno.peak_promoters + File? sicer_peak_genebody = sicer_peaksanno.peak_genebody + File? sicer_peak_window = sicer_peaksanno.peak_window + File? sicer_peak_closest = sicer_peaksanno.peak_closest + File? sicer_peak_comparison = sicer_peaksanno.peak_comparison + File? sicer_gene_comparison = sicer_peaksanno.gene_comparison + File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison + + #VISUALIZATION + File? bigwig = visualization.bigwig + File? norm_wig = visualization.norm_wig + File? tdffile = visualization.tdffile + File? n_bigwig = viznomodel.bigwig + File? n_norm_wig = viznomodel.norm_wig + File? n_tdffile = viznomodel.tdffile + File? a_bigwig = vizall.bigwig + File? a_norm_wig = vizall.norm_wig + File? a_tdffile = vizall.tdffile + + File? s_bigwig = vizsicer.bigwig + File? s_norm_wig = vizsicer.norm_wig + File? s_tdffile = vizsicer.tdffile + + #QC-STATS + Array[File?]? s_qc_statsfile = indv_summarystats.statsfile + Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile + Array[File?]? s_qc_textfile = indv_summarystats.textfile + File? s_qc_mergehtml = mergehtml.mergefile + + File? s_uno_statsfile = uno_summarystats.statsfile + File? s_uno_htmlfile = uno_summarystats.htmlfile + File? s_uno_textfile = uno_summarystats.textfile + + File? statsfile = merge_summarystats.statsfile + File? htmlfile = merge_summarystats.htmlfile + File? textfile = merge_summarystats.textfile + + File? summaryhtml = select_first([ + uno_overallsummary.summaryhtml, + merge_overallsummary.summaryhtml, + ]) + File? summarytxt = select_first([ + uno_overallsummary.summarytxt, + merge_overallsummary.summarytxt, + ]) + } + +} From 2cef6f81703b58372f8d370b2ebebb0dad5f005e Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Thu, 10 Oct 2024 10:30:21 -0400 Subject: [PATCH 16/60] [WIP]tests: format each current test --- wdl-format/src/lib.rs | 34 +- wdl-format/src/token/post.rs | 2 +- wdl-format/src/v1.rs | 21 + wdl-format/src/v1/expr.rs | 185 +- wdl-format/src/v1/task.rs | 261 +- wdl-format/src/v1/workflow.rs | 7 + wdl-format/src/v1/workflow/call.rs | 7 +- .../source.formatted.wdl | 3626 +++++++++++++++++ .../clays_complex_script/source.formatted.wdl | 188 + .../source.formatted.wdl | 123 + .../interrupt_example/source.formatted.wdl | 1 - .../format/seaseq-case/source.formatted.wdl | 1 - 12 files changed, 4418 insertions(+), 38 deletions(-) create mode 100644 wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl create mode 100644 wdl-format/tests/format/clays_complex_script/source.formatted.wdl create mode 100644 wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index e80d6da74..3b70a4c08 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -64,7 +64,7 @@ impl Writable for &FormatElement { v1::workflow::call::format_call_statement(self, stream) } AstNode::CallTarget(_) => v1::workflow::call::format_call_target(self, stream), - AstNode::CommandSection(_) => todo!(), + AstNode::CommandSection(_) => v1::task::format_command_section(self, stream), AstNode::ConditionalStatement(_) => { v1::workflow::format_conditional_statement(self, stream) } @@ -90,12 +90,12 @@ impl Writable for &FormatElement { AstNode::LiteralInput(_) => todo!(), AstNode::LiteralInputItem(_) => todo!(), AstNode::LiteralInteger(_) => v1::expr::format_literal_integer(self, stream), - AstNode::LiteralMap(_) => todo!(), - AstNode::LiteralMapItem(_) => todo!(), + AstNode::LiteralMap(_) => v1::expr::format_literal_map(self, stream), + AstNode::LiteralMapItem(_) => v1::expr::format_literal_map_item(self, stream), AstNode::LiteralNone(_) => todo!(), AstNode::LiteralNull(_) => v1::format_literal_null(self, stream), - AstNode::LiteralObject(_) => todo!(), - AstNode::LiteralObjectItem(_) => todo!(), + AstNode::LiteralObject(_) => v1::expr::format_literal_object(self, stream), + AstNode::LiteralObjectItem(_) => v1::expr::format_literal_object_item(self, stream), AstNode::LiteralOutput(_) => todo!(), AstNode::LiteralOutputItem(_) => todo!(), AstNode::LiteralPair(_) => todo!(), @@ -107,7 +107,7 @@ impl Writable for &FormatElement { AstNode::LogicalAndExpr(_) => v1::expr::format_logical_and_expr(self, stream), AstNode::LogicalNotExpr(_) => v1::expr::format_logical_not_expr(self, stream), AstNode::LogicalOrExpr(_) => v1::expr::format_logical_or_expr(self, stream), - AstNode::MapType(_) => todo!(), + AstNode::MapType(_) => v1::format_map_type(self, stream), AstNode::MetadataArray(_) => v1::format_metadata_array(self, stream), AstNode::MetadataObject(_) => v1::format_metadata_object(self, stream), AstNode::MetadataObjectItem(_) => v1::format_metadata_object_item(self, stream), @@ -119,29 +119,31 @@ impl Writable for &FormatElement { AstNode::NameRef(_) => v1::expr::format_name_ref(self, stream), AstNode::NegationExpr(_) => v1::expr::format_negation_expr(self, stream), AstNode::OutputSection(_) => v1::format_output_section(self, stream), - AstNode::PairType(_) => todo!(), - AstNode::ObjectType(_) => todo!(), + AstNode::PairType(_) => v1::format_pair_type(self, stream), + AstNode::ObjectType(_) => v1::format_object_type(self, stream), AstNode::ParameterMetadataSection(_) => { v1::format_parameter_metadata_section(self, stream) } AstNode::ParenthesizedExpr(_) => v1::expr::format_parenthesized_expr(self, stream), - AstNode::Placeholder(_) => todo!(), + AstNode::Placeholder(_) => v1::expr::format_placeholder(self, stream), AstNode::PrimitiveType(_) => v1::format_primitive_type(self, stream), - AstNode::RequirementsItem(_) => todo!(), - AstNode::RequirementsSection(_) => todo!(), - AstNode::RuntimeItem(_) => todo!(), - AstNode::RuntimeSection(_) => todo!(), + AstNode::RequirementsItem(_) => v1::task::format_requirements_item(self, stream), + AstNode::RequirementsSection(_) => { + v1::task::format_requirements_section(self, stream) + } + AstNode::RuntimeItem(_) => v1::task::format_runtime_item(self, stream), + AstNode::RuntimeSection(_) => v1::task::format_runtime_section(self, stream), AstNode::ScatterStatement(_) => { v1::workflow::format_scatter_statement(self, stream) } - AstNode::SepOption(_) => todo!(), + AstNode::SepOption(_) => v1::expr::format_sep_option(self, stream), AstNode::StructDefinition(_) => { v1::r#struct::format_struct_definition(self, stream) } AstNode::SubtractionExpr(_) => todo!(), AstNode::TaskDefinition(_) => v1::task::format_task_definition(self, stream), - AstNode::TaskHintsItem(_) => todo!(), - AstNode::TaskHintsSection(_) => todo!(), + AstNode::TaskHintsItem(_) => v1::task::format_task_hints_item(self, stream), + AstNode::TaskHintsSection(_) => v1::task::format_task_hints_section(self, stream), AstNode::TrueFalseOption(_) => todo!(), AstNode::TypeRef(_) => v1::format_type_ref(self, stream), AstNode::UnboundDecl(_) => v1::format_unbound_decl(self, stream), diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index c69352d5a..aaec2dbc8 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -163,7 +163,7 @@ impl Postprocessor { Comment::Preceding(value) => { if !matches!( stream.0.last(), - Some(&PostToken::Newline) | Some(&PostToken::Indent) + Some(&PostToken::Newline) | Some(&PostToken::Indent) | None ) { self.interrupted = true; } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index 1a6bca77f..89ed3cf0c 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -95,6 +95,27 @@ pub fn format_array_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("map type children") { + (&child).write(stream); + } +} + +/// Formats an [`ObjectType`](wdl_ast::v1::ObjectType). +pub fn format_object_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("object type children") { + (&child).write(stream); + } +} + +/// Formats a [`PairType`](wdl_ast::v1::PairType). +pub fn format_pair_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("pair type children") { + (&child).write(stream); + } +} + /// Formats a [`TypeRef`](wdl_ast::v1::TypeRef). pub fn format_type_ref(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("type ref children"); diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index ca9a470b3..32a5f5057 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -7,6 +7,54 @@ use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; +/// Formats a [`SepOption`](wdl_ast::v1::SepOption). +pub fn format_sep_option(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("sep option children"); + + let sep_keyword = children.next().expect("sep keyword"); + assert!(sep_keyword.element().kind() == SyntaxKind::Ident); + (&sep_keyword).write(stream); + + let equals = children.next().expect("sep equals"); + assert!(equals.element().kind() == SyntaxKind::Assignment); + (&equals).write(stream); + + let sep_value = children.next().expect("sep value"); + assert!(sep_value.element().kind() == SyntaxKind::LiteralStringNode); + (&sep_value).write(stream); + stream.end_word(); + + assert!(children.next().is_none()); +} + +/// Formats a [`Placeholder`](wdl_ast::v1::Placeholder). +pub fn format_placeholder(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("placeholder children"); + + let open = children.next().expect("placeholder open"); + assert!(open.element().kind() == SyntaxKind::PlaceholderOpen); + let syntax = open.element().syntax(); + let text = syntax.as_token().expect("token").text(); + match text { + "${" => { + stream.push_literal_in_place_of_token( + open.element().as_token().expect("token"), + "~{".to_owned(), + ); + } + "~{" => { + (&open).write(stream); + } + _ => { + unreachable!("unexpected placeholder open: {:?}", text); + } + } + + for child in children { + (&child).write(stream); + } +} + /// Formats a [`LiteralString`](wdl_ast::v1::LiteralString). pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("literal string children") { @@ -58,7 +106,7 @@ pub fn format_literal_float(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("name ref children"); let name = children.next().expect("name ref name"); @@ -107,6 +155,123 @@ pub fn format_literal_array(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal map item children"); + + let key = children.next().expect("literal map item key"); + (&key).write(stream); + + let colon = children.next().expect("literal map item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("literal map item value"); + (&value).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralMap`](wdl_ast::v1::LiteralMap). +pub fn format_literal_map(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal map children"); + + let open_brace = children.next().expect("literal map open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut close_brace = None; + let mut commas = Vec::new(); + let items = children + .filter(|child| { + if child.element().kind() == SyntaxKind::CloseBrace { + close_brace = Some(child.to_owned()); + false + } else if child.element().kind() == SyntaxKind::Comma { + commas.push(child.to_owned()); + false + } else { + true + } + }) + .collect::>(); + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + stream.end_line(); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + stream.end_line(); + } + } + + stream.decrement_indent(); + (&close_brace.expect("literal map close brace")).write(stream); +} + +/// Formats a [`LiteralObjectItem`](wdl_ast::v1::LiteralObjectItem). +pub fn format_literal_object_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal object item children"); + + let key = children.next().expect("literal object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("literal object item value"); + (&value).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralObject`](wdl_ast::v1::LiteralObject). +pub fn format_literal_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal object children"); + + let open_brace = children.next().expect("literal object open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut close_brace = None; + let mut commas = Vec::new(); + let members = children + .filter(|child| { + if child.element().kind() == SyntaxKind::CloseBrace { + close_brace = Some(child.to_owned()); + false + } else if child.element().kind() == SyntaxKind::Comma { + commas.push(child.to_owned()); + false + } else { + true + } + }) + .collect::>(); + + let mut commas = commas.iter(); + for member in members { + (&member).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + stream.end_line(); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + stream.end_line(); + } + } + + stream.decrement_indent(); + (&close_brace.expect("literal object close brace")).write(stream); +} + /// Formats a [`AccessExpr`](wdl_ast::v1::AccessExpr). pub fn format_access_expr(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("access expr children") { @@ -134,12 +299,12 @@ pub fn format_index_expr(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("addition expr children") { - let kind = child.element().kind(); - if kind == SyntaxKind::Plus { + let should_end_word = child.element().kind() == SyntaxKind::Plus; + if should_end_word { stream.end_word(); } (&child).write(stream); - if kind == SyntaxKind::Plus { + if should_end_word { stream.end_word(); } } @@ -148,12 +313,12 @@ pub fn format_addition_expr(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("multiplication expr children") { - let kind = child.element().kind(); - if kind == SyntaxKind::Asterisk { + let should_end_word = child.element().kind() == SyntaxKind::Asterisk; + if should_end_word { stream.end_word(); } (&child).write(stream); - if kind == SyntaxKind::Asterisk { + if should_end_word { stream.end_word(); } } @@ -162,12 +327,12 @@ pub fn format_multiplication_expr(element: &FormatElement, stream: &mut TokenStr /// Formats a [`LogicalAndExpr`](wdl_ast::v1::LogicalAndExpr). pub fn format_logical_and_expr(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("logical and expr children") { - let kind = child.element().kind(); - if kind == SyntaxKind::LogicalAnd { + let should_end_word = child.element().kind() == SyntaxKind::LogicalAnd; + if should_end_word { stream.end_word(); } (&child).write(stream); - if kind == SyntaxKind::LogicalAnd { + if should_end_word { stream.end_word(); } } diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index 6c7b6d84c..4dc4797df 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -3,6 +3,7 @@ use wdl_ast::SyntaxKind; use crate::PreToken; +use crate::Trivia; use crate::TokenStream; use crate::Writable as _; use crate::element::FormatElement; @@ -31,9 +32,11 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< let mut parameter_meta = None; let mut input = None; let mut body = Vec::new(); - let mut runtime = None; let mut command = None; let mut output = None; + let mut requirements = None; + let mut runtime = None; + let mut hints = None; let mut close_brace = None; for child in children { @@ -47,8 +50,8 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< SyntaxKind::ParameterMetadataSectionNode => { parameter_meta = Some(child.clone()); } - SyntaxKind::RuntimeSectionNode => { - runtime = Some(child.clone()); + SyntaxKind::BoundDeclNode => { + body.push(child.clone()); } SyntaxKind::CommandSectionNode => { command = Some(child.clone()); @@ -56,8 +59,14 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< SyntaxKind::OutputSectionNode => { output = Some(child.clone()); } - SyntaxKind::BoundDeclNode => { - body.push(child.clone()); + SyntaxKind::RequirementsSectionNode => { + requirements = Some(child.clone()); + } + SyntaxKind::RuntimeSectionNode => { + runtime = Some(child.clone()); + } + SyntaxKind::TaskHintsSectionNode => { + hints = Some(child.clone()); } SyntaxKind::CloseBrace => { close_brace = Some(child.clone()); @@ -86,9 +95,13 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< stream.blank_line(); } + let need_blank = !body.is_empty(); for child in body { (&child).write(stream); } + if need_blank { + stream.blank_line(); + } if let Some(command) = command { (&command).write(stream); @@ -100,12 +113,248 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< stream.blank_line(); } - if let Some(runtime) = runtime { + if let Some(requirements) = requirements { + (&requirements).write(stream); + stream.blank_line(); + } else if let Some(runtime) = runtime { (&runtime).write(stream); stream.blank_line(); } + if let Some(hints) = hints { + (&hints).write(stream); + stream.blank_line(); + } + + stream.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); + stream.decrement_indent(); (&close_brace.expect("task close brace")).write(stream); stream.end_line(); } + +/// Formats a [`CommandSection`](wdl_ast::v1::CommandSection). +pub fn format_command_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("command section children"); + + let command_keyword = children.next().expect("command keyword"); + assert!(command_keyword.element().kind() == SyntaxKind::CommandKeyword); + (&command_keyword).write(stream); + stream.end_word(); + + let open_delimiter = children.next().expect("open delimiter"); + match open_delimiter.element().kind() { + SyntaxKind::OpenBrace => { + stream.push_literal_in_place_of_token(open_delimiter.element().as_token().expect("open brace should be token"), "<<<".to_string()); + }, + SyntaxKind::OpenHeredoc => { + (&open_delimiter).write(stream); + }, + _ => { + unreachable!("unexpected open delimiter in command section: {:?}", open_delimiter.element().kind()); + } + } + stream.increment_indent(); + + for child in children { + let kind = child.element().kind(); + if kind == SyntaxKind::CloseBrace { + stream.decrement_indent(); + stream.push_literal_in_place_of_token(child.element().as_token().expect("close brace should be token"), ">>>".to_string()); + } else if kind == SyntaxKind::CloseHeredoc { + stream.decrement_indent(); + (&child).write(stream); + } else { + assert!(matches!(kind, SyntaxKind::LiteralCommandText | SyntaxKind::PlaceholderNode)); + (&child).write(stream); + } + } + stream.end_line(); +} + +/// Formats a [`RequirementsItem`](wdl_ast::v1::RequirementsItem). +pub fn format_requirements_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("requirements item children"); + + let name = children.next().expect("requirements item name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + + let colon = children.next().expect("requirements item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("requirements item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`RequirementsSection`](wdl_ast::v1::RequirementsSection). +pub fn format_requirements_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("requirements section children"); + + let requirements_keyword = children.next().expect("requirements keyword"); + assert!(requirements_keyword.element().kind() == SyntaxKind::RequirementsKeyword); + (&requirements_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::RequirementsItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in requirements section: {:?}", + child.element().kind() + ); + } + } + } + + for item in items { + (&item).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("requirements close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`TaskHintsItem`](wdl_ast::v1::TaskHintsItem). +pub fn format_task_hints_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("task hints item children"); + + let name = children.next().expect("task hints item name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + + let colon = children.next().expect("task hints item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("task hints item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`RuntimeItem`](wdl_ast::v1::RuntimeItem). +pub fn format_runtime_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("runtime item children"); + + let name = children.next().expect("runtime item name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + + let colon = children.next().expect("runtime item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("runtime item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`RuntimeSection`](wdl_ast::v1::RuntimeSection). +pub fn format_runtime_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("runtime section children"); + + let runtime_keyword = children.next().expect("runtime keyword"); + assert!(runtime_keyword.element().kind() == SyntaxKind::RuntimeKeyword); + (&runtime_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::RuntimeItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in runtime section: {:?}", + child.element().kind() + ); + } + } + } + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("runtime close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`TaskHintsSection`](wdl_ast::v1::TaskHintsSection). +pub fn format_task_hints_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("task hints section children"); + + let hints_keyword = children.next().expect("hints keyword"); + assert!(hints_keyword.element().kind() == SyntaxKind::HintsKeyword); + (&hints_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::TaskHintsItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in task hints section: {:?}", + child.element().kind() + ); + } + } + } + + for item in items { + (&item).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("task hints close brace")).write(stream); + stream.end_line(); +} diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index 807635818..4b4e50148 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -6,6 +6,7 @@ use wdl_ast::SyntaxKind; use crate::PreToken; use crate::TokenStream; +use crate::Trivia; use crate::Writable as _; use crate::element::FormatElement; @@ -169,15 +170,21 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr stream.blank_line(); } + let need_blank = !body.is_empty(); for child in body { (&child).write(stream); } + if need_blank { + stream.blank_line(); + } if let Some(output) = output { (&output).write(stream); stream.blank_line(); } + stream.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); + stream.decrement_indent(); (&close_brace.expect("workflow close brace")).write(stream); stream.end_line(); diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs index c8eeb4953..ce3956972 100644 --- a/wdl-format/src/v1/workflow/call.rs +++ b/wdl-format/src/v1/workflow/call.rs @@ -36,15 +36,16 @@ pub fn format_call_input_item(element: &FormatElement, stream: &mut TokenStream< let name = children.next().expect("call input item name"); (&name).write(stream); - stream.end_word(); + // Don't call end_word() here in case the name is alone - let equals = children.next(); - if let Some(equals) = equals { + if let Some(equals) = children.next() { + stream.end_word(); (&equals).write(stream); stream.end_word(); let value = children.next().expect("call input item value"); (&value).write(stream); + assert!(children.next().is_none()); } } diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl new file mode 100644 index 000000000..c781b9178 --- /dev/null +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -0,0 +1,3626 @@ +version 1.0 + +struct RuntimeEnvironment { + String docker + String singularity + String conda +} + +workflow chip { + + meta { + version: "v2.2.2" + + author: "Jin wook Lee" + email: "leepc12@gmail.com" + description: "ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil." + organization: "ENCODE DCC" + + specification_document: "https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing" + + default_docker: "encodedcc/chip-seq-pipeline:v2.2.2" + default_singularity: "https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif" + croo_out_def: "https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json" + + parameter_group: { + runtime_environment: { + title: "Runtime environment", + description: "Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.", + }, + pipeline_metadata: { + title: "Pipeline metadata", + description: "Metadata for a pipeline (e.g. title and description).", + }, + reference_genome: { + title: "Reference genome", + description: "Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.", + help: "Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.", + }, + input_genomic_data: { + title: "Input genomic data", + description: "Genomic input files for experiment.", + help: "Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].", + }, + input_genomic_data_control: { + title: "Input genomic data (control)", + description: "Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.", + help: "Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.", + }, + pipeline_parameter: { + title: "Pipeline parameter", + description: "Pipeline type and flags to turn on/off analyses.", + help: "Use chip.align_only to align FASTQs without peak calling.", + }, + alignment: { + title: "Alignment", + description: "Parameters for alignment.", + help: "Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.", + }, + peak_calling: { + title: "Peak calling", + description: "Parameters for peak calling.", + help: "This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.", + }, + resource_parameter: { + title: "Resource parameter", + description: "Number of CPUs (threads), max. memory and walltime for tasks.", + help: "Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.", + }, + } + } + + parameter_meta { + docker: { + description: "Default Docker image URI to run WDL tasks.", + group: "runtime_environment", + example: "ubuntu:20.04", + } + singularity: { + description: "Default Singularity image URI to run WDL tasks. For Singularity users only.", + group: "runtime_environment", + example: "docker://ubuntu:20.04", + } + conda: { + description: "Default Conda environment name to run WDL tasks. For Conda users only.", + group: "runtime_environment", + example: "encd-chip", + } + conda_macs2: { + description: "Conda environment name for task macs2. For Conda users only.", + group: "runtime_environment", + example: "encd-chip-macs2", + } + conda_spp: { + description: "Conda environment name for tasks spp/xcor. For Conda users only.", + group: "runtime_environment", + example: "encd-chip-spp", + } + title: { + description: "Experiment title.", + group: "pipeline_metadata", + example: "ENCSR936XTK (subsampled 1/50)", + } + description: { + description: "Experiment description.", + group: "pipeline_metadata", + example: "ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)", + } + genome_tsv: { + description: "Reference genome database TSV.", + group: "reference_genome", + help: "This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.", + example: "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv", + } + genome_name: { + description: "Genome name.", + group: "reference_genome", + } + ref_fa: { + description: "Reference FASTA file.", + group: "reference_genome", + } + bowtie2_idx_tar: { + description: "BWA index TAR file.", + group: "reference_genome", + } + custom_aligner_idx_tar: { + description: "Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.", + group: "reference_genome", + } + chrsz: { + description: "2-col chromosome sizes file.", + group: "reference_genome", + } + blacklist: { + description: "Blacklist file in BED format.", + group: "reference_genome", + help: "Peaks will be filtered with this file.", + } + blacklist2: { + description: "Secondary blacklist file in BED format.", + group: "reference_genome", + help: "If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.", + } + mito_chr_name: { + description: "Mitochondrial chromosome name.", + group: "reference_genome", + help: "e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.", + } + regex_bfilt_peak_chr_name: { + description: "Reg-ex for chromosomes to keep while filtering peaks.", + group: "reference_genome", + help: "Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.", + } + gensz: { + description: "Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.", + group: "reference_genome", + } + paired_end: { + description: "Sequencing endedness.", + group: "input_genomic_data", + help: "Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.", + example: true, + } + paired_ends: { + description: "Sequencing endedness array (for mixed SE/PE datasets).", + group: "input_genomic_data", + help: "Whether each biological replicate is paired ended or not.", + } + fastqs_rep1_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 1.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz", + ], + } + fastqs_rep1_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 1.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz", + ], + } + fastqs_rep2_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 2.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz", + ], + } + fastqs_rep2_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 2.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz", + ], + } + fastqs_rep3_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 3.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep3_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 3.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep4_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 4.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep4_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 4.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep5_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 5.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep5_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 5.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep6_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 6.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep6_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 6.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep7_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 7.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep7_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 7.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep8_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 8.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep8_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 8.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep9_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 9.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep9_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 9.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep10_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 10.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep10_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 10.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.", + } + bams: { + description: "List of unfiltered/raw BAM files for each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].", + } + nodup_bams: { + description: "List of filtered/deduped BAM files for each biological replicate", + group: "input_genomic_data", + help: "Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].", + } + tas: { + description: "List of TAG-ALIGN files for each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].", + } + peaks: { + description: "List of NARROWPEAK files (not blacklist filtered) for each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.", + } + peaks_pr1: { + description: "List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.", + } + peaks_pr2: { + description: "List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.", + } + peak_pooled: { + description: "NARROWPEAK file for pooled true replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.", + } + peak_ppr1: { + description: "NARROWPEAK file for pooled pseudo replicate 1.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.", + } + peak_ppr2: { + description: "NARROWPEAK file for pooled pseudo replicate 2.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.", + } + + ctl_paired_end: { + description: "Sequencing endedness for all controls.", + group: "input_genomic_data_control", + help: "Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.", + } + ctl_paired_ends: { + description: "Sequencing endedness array for mixed SE/PE controls.", + group: "input_genomic_data_control", + help: "Whether each control replicate is paired ended or not.", + } + ctl_fastqs_rep1_R1: { + description: "Read1 FASTQs to be merged for a control replicate 1.", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep1_R2: { + description: "Read2 FASTQs to be merged for a control replicate 1.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep2_R1: { + description: "Read1 FASTQs to be merged for a control replicate 2.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep2_R2: { + description: "Read2 FASTQs to be merged for a control replicate 2.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep3_R1: { + description: "Read1 FASTQs to be merged for a control replicate 3.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep3_R2: { + description: "Read2 FASTQs to be merged for a control replicate 3.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep4_R1: { + description: "Read1 FASTQs to be merged for a control replicate 4.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep4_R2: { + description: "Read2 FASTQs to be merged for a control replicate 4.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep5_R1: { + description: "Read1 FASTQs to be merged for a control replicate 5.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep5_R2: { + description: "Read2 FASTQs to be merged for a control replicate 5.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep6_R1: { + description: "Read1 FASTQs to be merged for a control replicate 6.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep6_R2: { + description: "Read2 FASTQs to be merged for a control replicate 6.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep7_R1: { + description: "Read1 FASTQs to be merged for a control replicate 7.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep7_R2: { + description: "Read2 FASTQs to be merged for a control replicate 7.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep8_R1: { + description: "Read1 FASTQs to be merged for a control replicate 8.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep8_R2: { + description: "Read2 FASTQs to be merged for a control replicate 8.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep9_R1: { + description: "Read1 FASTQs to be merged for a control replicate 9.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep9_R2: { + description: "Read2 FASTQs to be merged for a control replicate 9.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep10_R1: { + description: "Read1 FASTQs to be merged for a control replicate 10.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep10_R2: { + description: "Read2 FASTQs to be merged for a control replicate 10.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_bams: { + description: "List of unfiltered/raw BAM files for each control replicate.", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].", + } + ctl_nodup_bams: { + description: "List of filtered/deduped BAM files for each control replicate", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].", + } + ctl_tas: { + description: "List of TAG-ALIGN files for each biological replicate.", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].", + } + + pipeline_type: { + description: "Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.", + group: "pipeline_parameter", + help: "Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.", + choices: [ + "tf", + "histone", + "control", + ], + example: "tf", + } + redact_nodup_bam: { + description: "Redact filtered/nodup BAM.", + group: "pipeline_parameter", + help: "Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.", + } + align_only: { + description: "Align only mode.", + group: "pipeline_parameter", + help: "Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.", + } + true_rep_only: { + description: "Disables all analyses related to pseudo-replicates.", + group: "pipeline_parameter", + help: "Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).", + } + enable_count_signal_track: { + description: "Enables generation of count signal tracks.", + group: "pipeline_parameter", + } + enable_jsd: { + description: "Enables Jensen-Shannon Distance (JSD) plot generation.", + group: "pipeline_parameter", + } + enable_gc_bias: { + description: "Enables GC bias calculation.", + group: "pipeline_parameter", + } + + aligner: { + description: "Aligner. bowtie2, bwa or custom", + group: "alignment", + help: "It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.", + choices: [ + "bowtie2", + "bwa", + "custom", + ], + example: "bowtie2", + } + custom_align_py: { + description: "Python script for a custom aligner.", + group: "alignment", + help: "There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".", + } + use_bwa_mem_for_pe: { + description: "For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.", + group: "alignment", + help: "Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.", + } + bwa_mem_read_len_limit: { + description: "Read length limit for bwa mem (for PE FASTQs only).", + group: "alignment", + help: "If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.", + } + use_bowtie2_local_mode: { + description: "Use bowtie2\'s local mode (soft-clipping).", + group: "alignment", + help: "This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.", + } + crop_length: { + description: "Crop FASTQs\' reads longer than this length.", + group: "alignment", + help: "Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.", + } + crop_length_tol: { + description: "Tolerance for cropping reads in FASTQs.", + group: "alignment", + help: "Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.", + } + trimmomatic_phred_score_format: { + description: "Base encoding (format) for Phred score in FASTQs.", + group: "alignment", + choices: [ + "auto", + "phred33", + "phred64", + ], + help: "This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".", + } + xcor_trim_bp: { + description: "Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.", + group: "alignment", + help: "This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.", + } + use_filt_pe_ta_for_xcor: { + description: "Use filtered PE BAM for cross-correlation analysis.", + group: "alignment", + help: "If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.", + } + dup_marker: { + description: "Marker for duplicate reads. picard or sambamba.", + group: "alignment", + help: "picard for Picard MarkDuplicates or sambamba for sambamba markdup.", + choices: [ + "picard", + "sambamba", + ], + example: "picard", + } + no_dup_removal: { + description: "Disable removal of duplicate reads during filtering BAM.", + group: "alignment", + help: "Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.", + } + mapq_thresh: { + description: "Threshold for low MAPQ reads removal.", + group: "alignment", + help: "Low MAPQ reads are filtered out while filtering BAM.", + } + filter_chrs: { + description: "List of chromosomes to be filtered out while filtering BAM.", + group: "alignment", + help: "It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.", + } + subsample_reads: { + description: "Subsample reads. Shuffle and subsample reads.", + group: "alignment", + help: "This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.", + } + ctl_subsample_reads: { + description: "Subsample control reads. Shuffle and subsample control reads.", + group: "alignment", + help: "This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.", + } + xcor_subsample_reads: { + description: "Subsample reads for cross-corrlelation analysis only.", + group: "alignment", + help: "This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.", + } + xcor_exclusion_range_min: { + description: "Exclusion minimum for cross-correlation analysis.", + group: "alignment", + help: "For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.", + } + xcor_exclusion_range_max: { + description: "Exclusion maximum for cross-coorrelation analysis.", + group: "alignment", + help: "For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used", + } + pseudoreplication_random_seed: { + description: "Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).", + group: "alignment", + help: "Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.", + } + ctl_depth_limit: { + description: "Hard limit for chosen control\'s depth.", + group: "peak_calling", + help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.", + } + exp_ctl_depth_ratio_limit: { + description: "Second limit for chosen control\'s depth.", + group: "peak_calling", + help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.", + } + fraglen: { + description: "Fragment length for each biological replicate.", + group: "peak_calling", + help: "Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.", + } + peak_caller: { + description: "Peak caller.", + group: "peak_calling", + help: "It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).", + example: "spp", + } + always_use_pooled_ctl: { + description: "Always choose a pooled control for each experiment replicate.", + group: "peak_calling", + help: "If turned on, ignores chip.ctl_depth_ratio.", + } + ctl_depth_ratio: { + description: "Maximum depth ratio between control replicates.", + group: "peak_calling", + help: "If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.", + } + + cap_num_peak: { + description: "Upper limit on the number of peaks.", + group: "peak_calling", + help: "It is 30000000 and 50000000 by default for spp and macs2, respectively.", + } + pval_thresh: { + description: "p-value Threshold for MACS2 peak caller.", + group: "peak_calling", + help: "macs2 callpeak -p", + } + fdr_thresh: { + description: "FDR threshold for spp peak caller (phantompeakqualtools).", + group: "peak_calling", + help: "run_spp.R -fdr=", + } + idr_thresh: { + description: "IDR threshold.", + group: "peak_calling", + } + + align_cpu: { + description: "Number of cores for task align.", + group: "resource_parameter", + help: "Task align merges/crops/maps FASTQs.", + } + align_bowtie2_mem_factor: { + description: "Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + align_bwa_mem_factor: { + description: "Multiplication factor to determine memory required for task align with bwa as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + align_time_hr: { + description: "Walltime (h) required for task align.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + align_bowtie2_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.", + } + align_bwa_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task align with bwa as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.", + } + filter_cpu: { + description: "Number of cores for task filter.", + group: "resource_parameter", + help: "Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.", + } + filter_mem_factor: { + description: "Multiplication factor to determine memory required for task filter.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + filter_time_hr: { + description: "Walltime (h) required for task filter.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + filter_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task filter.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.", + } + bam2ta_cpu: { + description: "Number of cores for task bam2ta.", + group: "resource_parameter", + help: "Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.", + } + bam2ta_mem_factor: { + description: "Multiplication factor to determine memory required for task bam2ta.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + bam2ta_time_hr: { + description: "Walltime (h) required for task bam2ta.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + bam2ta_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task bam2ta.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.", + } + spr_mem_factor: { + description: "Multiplication factor to determine memory required for task spr.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + spr_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task spr.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.", + } + jsd_cpu: { + description: "Number of cores for task jsd.", + group: "resource_parameter", + help: "Task jsd plots Jensen-Shannon distance and metrics related to it.", + } + jsd_mem_factor: { + description: "Multiplication factor to determine memory required for task jsd.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + jsd_time_hr: { + description: "Walltime (h) required for task jsd.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + jsd_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task jsd.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.", + } + xcor_cpu: { + description: "Number of cores for task xcor.", + group: "resource_parameter", + help: "Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.", + } + xcor_mem_factor: { + description: "Multiplication factor to determine memory required for task xcor.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + xcor_time_hr: { + description: "Walltime (h) required for task xcor.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + xcor_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task xcor.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + subsample_ctl_mem_factor: { + description: "Multiplication factor to determine memory required for task subsample_ctl.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + subsample_ctl_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task subsample_ctl.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + call_peak_cpu: { + description: "Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.", + group: "resource_parameter", + help: "Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.", + } + call_peak_spp_mem_factor: { + description: "Multiplication factor to determine memory required for task call_peak with spp as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + call_peak_macs2_mem_factor: { + description: "Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + call_peak_time_hr: { + description: "Walltime (h) required for task call_peak.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + call_peak_spp_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + call_peak_macs2_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + macs2_signal_track_mem_factor: { + description: "Multiplication factor to determine memory required for task macs2_signal_track.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + macs2_signal_track_time_hr: { + description: "Walltime (h) required for task macs2_signal_track.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + macs2_signal_track_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task macs2_signal_track.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + align_trimmomatic_java_heap: { + description: "Maximum Java heap (java -Xmx) in task align.", + group: "resource_parameter", + help: "Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.", + } + filter_picard_java_heap: { + description: "Maximum Java heap (java -Xmx) in task filter.", + group: "resource_parameter", + help: "Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.", + } + gc_bias_picard_java_heap: { + description: "Maximum Java heap (java -Xmx) in task gc_bias.", + group: "resource_parameter", + help: "Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.", + } + } + + input { + # group: runtime_environment + String docker = "encodedcc/chip-seq-pipeline:v2.2.2" + String singularity = "https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif" + String conda = "encd-chip" + String conda_macs2 = "encd-chip-macs2" + String conda_spp = "encd-chip-spp" + + # group: pipeline_metadata + String title = "Untitled" + String description = "No description" + + # group: reference_genome + File? genome_tsv + String? genome_name + File? ref_fa + File? bwa_idx_tar + File? bowtie2_idx_tar + File? chrsz + File? blacklist + File? blacklist2 + String? mito_chr_name + String? regex_bfilt_peak_chr_name + String? gensz + File? custom_aligner_idx_tar + + # group: input_genomic_data + Boolean? paired_end + Array[Boolean] paired_ends = [ + ] + Array[File] fastqs_rep1_R1 = [ + ] + Array[File] fastqs_rep1_R2 = [ + ] + Array[File] fastqs_rep2_R1 = [ + ] + Array[File] fastqs_rep2_R2 = [ + ] + Array[File] fastqs_rep3_R1 = [ + ] + Array[File] fastqs_rep3_R2 = [ + ] + Array[File] fastqs_rep4_R1 = [ + ] + Array[File] fastqs_rep4_R2 = [ + ] + Array[File] fastqs_rep5_R1 = [ + ] + Array[File] fastqs_rep5_R2 = [ + ] + Array[File] fastqs_rep6_R1 = [ + ] + Array[File] fastqs_rep6_R2 = [ + ] + Array[File] fastqs_rep7_R1 = [ + ] + Array[File] fastqs_rep7_R2 = [ + ] + Array[File] fastqs_rep8_R1 = [ + ] + Array[File] fastqs_rep8_R2 = [ + ] + Array[File] fastqs_rep9_R1 = [ + ] + Array[File] fastqs_rep9_R2 = [ + ] + Array[File] fastqs_rep10_R1 = [ + ] + Array[File] fastqs_rep10_R2 = [ + ] + Array[File] bams = [ + ] + Array[File] nodup_bams = [ + ] + Array[File] tas = [ + ] + Array[File] peaks = [ + ] + Array[File] peaks_pr1 = [ + ] + Array[File] peaks_pr2 = [ + ] + File? peak_ppr1 + File? peak_ppr2 + File? peak_pooled + + Boolean? ctl_paired_end + Array[Boolean] ctl_paired_ends = [ + ] + Array[File] ctl_fastqs_rep1_R1 = [ + ] + Array[File] ctl_fastqs_rep1_R2 = [ + ] + Array[File] ctl_fastqs_rep2_R1 = [ + ] + Array[File] ctl_fastqs_rep2_R2 = [ + ] + Array[File] ctl_fastqs_rep3_R1 = [ + ] + Array[File] ctl_fastqs_rep3_R2 = [ + ] + Array[File] ctl_fastqs_rep4_R1 = [ + ] + Array[File] ctl_fastqs_rep4_R2 = [ + ] + Array[File] ctl_fastqs_rep5_R1 = [ + ] + Array[File] ctl_fastqs_rep5_R2 = [ + ] + Array[File] ctl_fastqs_rep6_R1 = [ + ] + Array[File] ctl_fastqs_rep6_R2 = [ + ] + Array[File] ctl_fastqs_rep7_R1 = [ + ] + Array[File] ctl_fastqs_rep7_R2 = [ + ] + Array[File] ctl_fastqs_rep8_R1 = [ + ] + Array[File] ctl_fastqs_rep8_R2 = [ + ] + Array[File] ctl_fastqs_rep9_R1 = [ + ] + Array[File] ctl_fastqs_rep9_R2 = [ + ] + Array[File] ctl_fastqs_rep10_R1 = [ + ] + Array[File] ctl_fastqs_rep10_R2 = [ + ] + Array[File] ctl_bams = [ + ] + Array[File] ctl_nodup_bams = [ + ] + Array[File] ctl_tas = [ + ] + + # group: pipeline_parameter + String pipeline_type + Boolean align_only = false + Boolean redact_nodup_bam = false + Boolean true_rep_only = false + Boolean enable_count_signal_track = false + Boolean enable_jsd = true + Boolean enable_gc_bias = true + + # group: alignment + String aligner = "bowtie2" + File? custom_align_py + Boolean use_bwa_mem_for_pe = false + Int bwa_mem_read_len_limit = 70 + Boolean use_bowtie2_local_mode = false + Int crop_length = 0 + Int crop_length_tol = 2 + String trimmomatic_phred_score_format = "auto" + Int xcor_trim_bp = 50 + Boolean use_filt_pe_ta_for_xcor = false + String dup_marker = "picard" + Boolean no_dup_removal = false + Int mapq_thresh = 30 + Array[String] filter_chrs = [ + ] + Int subsample_reads = 0 + Int ctl_subsample_reads = 0 + Int xcor_subsample_reads = 15000000 + Int xcor_exclusion_range_min = -500 + Int? xcor_exclusion_range_max + Int pseudoreplication_random_seed = 0 + + # group: peak_calling + Int ctl_depth_limit = 200000000 + Float exp_ctl_depth_ratio_limit = 5.0 + Array[Int?] fraglen = [ + ] + String? peak_caller + Boolean always_use_pooled_ctl = true + Float ctl_depth_ratio = 1.2 + Int? cap_num_peak + Float pval_thresh = 0.01 + Float fdr_thresh = 0.01 + Float idr_thresh = 0.05 + + # group: resource_parameter + Int align_cpu = 6 + Float align_bowtie2_mem_factor = 0.15 + Float align_bwa_mem_factor = 1.0 + Int align_time_hr = 48 + Float align_bowtie2_disk_factor = 8.0 + Float align_bwa_disk_factor = 8.0 + + Int filter_cpu = 4 + Float filter_mem_factor = 0.4 + Int filter_time_hr = 24 + Float filter_disk_factor = 8.0 + + Int bam2ta_cpu = 2 + Float bam2ta_mem_factor = 0.35 + Int bam2ta_time_hr = 6 + Float bam2ta_disk_factor = 4.0 + + Float spr_mem_factor = 20.0 + Float spr_disk_factor = 30.0 + + Int jsd_cpu = 4 + Float jsd_mem_factor = 0.1 + Int jsd_time_hr = 6 + Float jsd_disk_factor = 2.0 + + Int xcor_cpu = 2 + Float xcor_mem_factor = 1.0 + Int xcor_time_hr = 24 + Float xcor_disk_factor = 4.5 + + Float subsample_ctl_mem_factor = 22.0 + Float subsample_ctl_disk_factor = 15.0 + + Float macs2_signal_track_mem_factor = 12.0 + Int macs2_signal_track_time_hr = 24 + Float macs2_signal_track_disk_factor = 80.0 + + Int call_peak_cpu = 6 + Float call_peak_spp_mem_factor = 5.0 + Float call_peak_macs2_mem_factor = 5.0 + Int call_peak_time_hr = 72 + Float call_peak_spp_disk_factor = 5.0 + Float call_peak_macs2_disk_factor = 30.0 + + String? align_trimmomatic_java_heap + String? filter_picard_java_heap + String? gc_bias_picard_java_heap + } + + String pipeline_ver = "v2.2.2" + RuntimeEnvironment runtime_environment = { + "docker": docker, + "singularity": singularity, + "conda": conda, + } + RuntimeEnvironment runtime_environment_spp = { + "docker": docker, + "singularity": singularity, + "conda": conda_spp, + } + RuntimeEnvironment runtime_environment_macs2 = { + "docker": docker, + "singularity": singularity, + "conda": conda_macs2, + } + + # read genome data and paths + if (defined(genome_tsv)) { + call read_genome_tsv { input: + genome_tsv = genome_tsv, + runtime_environment = runtime_environment, + } + } + File ref_fa_ = select_first([ + ref_fa, + read_genome_tsv.ref_fa, + ]) + File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar else read_genome_tsv.bwa_idx_tar + File bowtie2_idx_tar_ = select_first([ + bowtie2_idx_tar, + read_genome_tsv.bowtie2_idx_tar, + ]) + File chrsz_ = select_first([ + chrsz, + read_genome_tsv.chrsz, + ]) + String gensz_ = select_first([ + gensz, + read_genome_tsv.gensz, + ]) + File? blacklist1_ = if defined(blacklist) then blacklist else read_genome_tsv.blacklist + File? blacklist2_ = if defined(blacklist2) then blacklist2 else read_genome_tsv.blacklist2 + # merge multiple blacklists + # two blacklists can have different number of columns (3 vs 6) + # so we limit merged blacklist's columns to 3 + Array[File] blacklists = select_all([ + blacklist1_, + blacklist2_, + ]) + if (length(blacklists) > 1) { + call pool_ta as pool_blacklist { input: + tas = blacklists, + col = 3, + runtime_environment = runtime_environment, + } + } + File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled else if length(blacklists) > 0 then blacklists[0] else blacklist2_ + String mito_chr_name_ = select_first([ + mito_chr_name, + read_genome_tsv.mito_chr_name, + ]) + String regex_bfilt_peak_chr_name_ = select_first([ + regex_bfilt_peak_chr_name, + read_genome_tsv.regex_bfilt_peak_chr_name, + ]) + String genome_name_ = select_first([ + genome_name, + read_genome_tsv.genome_name, + basename(chrsz_), + ]) + + ### temp vars (do not define these) + String aligner_ = if defined(custom_align_py) then "custom" else aligner + String peak_caller_ = if pipeline_type == "tf" then select_first([ + peak_caller, + "spp", + ]) else select_first([ + peak_caller, + "macs2", + ]) + String peak_type_ = if peak_caller_ == "spp" then "regionPeak" else "narrowPeak" + Boolean enable_idr = pipeline_type == "tf" # enable_idr for TF chipseq only + String idr_rank_ = if peak_caller_ == "spp" then "signal.value" else if peak_caller_ == "macs2" then "p.value" else "p.value" + Int cap_num_peak_spp = 300000 + Int cap_num_peak_macs2 = 500000 + Int cap_num_peak_ = if peak_caller_ == "spp" then select_first([ + cap_num_peak, + cap_num_peak_spp, + ]) else select_first([ + cap_num_peak, + cap_num_peak_macs2, + ]) + Int mapq_thresh_ = mapq_thresh + Boolean enable_xcor_ = if pipeline_type == "control" then false else true + Boolean enable_count_signal_track_ = if pipeline_type == "control" then false else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type == "control" then false else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type == "control" then false else enable_gc_bias + Boolean align_only_ = if pipeline_type == "control" then true else align_only + + Float align_mem_factor_ = if aligner_ == "bowtie2" then align_bowtie2_mem_factor else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ == "bowtie2" then align_bowtie2_disk_factor else align_bwa_disk_factor + Float call_peak_mem_factor_ = if peak_caller_ == "spp" then call_peak_spp_mem_factor else call_peak_macs2_mem_factor + Float call_peak_disk_factor_ = if peak_caller_ == "spp" then call_peak_spp_disk_factor else call_peak_macs2_disk_factor + + # temporary 2-dim fastqs array [rep_id][merge_id] + Array[Array[File]] fastqs_R1 = if length(fastqs_rep10_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + fastqs_rep10_R1, + ] else if length(fastqs_rep9_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + ] else if length(fastqs_rep8_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + ] else if length(fastqs_rep7_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + ] else if length(fastqs_rep6_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + ] else if length(fastqs_rep5_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + ] else if length(fastqs_rep4_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + ] else if length(fastqs_rep3_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + ] else if length(fastqs_rep2_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + ] else if length(fastqs_rep1_R1) > 0 then [ + fastqs_rep1_R1, + ] else [ + ] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] fastqs_R2 = [ + fastqs_rep1_R2, + fastqs_rep2_R2, + fastqs_rep3_R2, + fastqs_rep4_R2, + fastqs_rep5_R2, + fastqs_rep6_R2, + fastqs_rep7_R2, + fastqs_rep8_R2, + fastqs_rep9_R2, + fastqs_rep10_R2, + ] + + # temporary 2-dim ctl fastqs array [rep_id][merge_id] + Array[Array[File]] ctl_fastqs_R1 = if length(ctl_fastqs_rep10_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ctl_fastqs_rep10_R1, + ] else if length(ctl_fastqs_rep9_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ] else if length(ctl_fastqs_rep8_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ] else if length(ctl_fastqs_rep7_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ] else if length(ctl_fastqs_rep6_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ] else if length(ctl_fastqs_rep5_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ] else if length(ctl_fastqs_rep4_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ] else if length(ctl_fastqs_rep3_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ] else if length(ctl_fastqs_rep2_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ] else if length(ctl_fastqs_rep1_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ] else [ + ] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] ctl_fastqs_R2 = [ + ctl_fastqs_rep1_R2, + ctl_fastqs_rep2_R2, + ctl_fastqs_rep3_R2, + ctl_fastqs_rep4_R2, + ctl_fastqs_rep5_R2, + ctl_fastqs_rep6_R2, + ctl_fastqs_rep7_R2, + ctl_fastqs_rep8_R2, + ctl_fastqs_rep9_R2, + ctl_fastqs_rep10_R2, + ] + + # temporary variables to get number of replicates + # WDLic implementation of max(A,B,C,...) + Int num_rep_fastq = length(fastqs_R1) + Int num_rep_bam = if length(bams) < num_rep_fastq then num_rep_fastq else length(bams) + Int num_rep_nodup_bam = if length(nodup_bams) < num_rep_bam then num_rep_bam else length(nodup_bams) + Int num_rep_ta = if length(tas) < num_rep_nodup_bam then num_rep_nodup_bam else length(tas) + Int num_rep_peak = if length(peaks) < num_rep_ta then num_rep_ta else length(peaks) + Int num_rep = num_rep_peak + + # temporary variables to get number of controls + Int num_ctl_fastq = length(ctl_fastqs_R1) + Int num_ctl_bam = if length(ctl_bams) < num_ctl_fastq then num_ctl_fastq else length(ctl_bams) + Int num_ctl_nodup_bam = if length(ctl_nodup_bams) < num_ctl_bam then num_ctl_bam else length(ctl_nodup_bams) + Int num_ctl_ta = if length(ctl_tas) < num_ctl_nodup_bam then num_ctl_nodup_bam else length(ctl_tas) + Int num_ctl = num_ctl_ta + + # sanity check for inputs + if (num_rep == 0 && num_ctl == 0) { + call raise_exception as error_input_data { input: + msg = "No FASTQ/BAM/TAG-ALIGN/PEAK defined in your input JSON. Check if your FASTQs are defined as "chip.fastqs_repX_RY". DO NOT MISS suffix _R1 even for single ended FASTQ.", + runtime_environment = runtime_environment, + } + } + if (!align_only_ && peak_caller_ == "spp" && num_ctl == 0) { + call raise_exception as error_control_required { input: + msg = "SPP requires control inputs. Define control input files ("chip.ctl_*") in an input JSON file.", + runtime_environment = runtime_environment, + } + } + if ((num_rep_fastq > 0 || num_ctl_fastq > 0) && aligner_ != "bwa" && aligner_ != "bowtie2" && aligner_ != "custom") { + call raise_exception as error_wrong_aligner { input: + msg = "Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.", + runtime_environment = runtime_environment, + } + } + if (aligner_ != "bwa" && use_bwa_mem_for_pe) { + call raise_exception as error_use_bwa_mem_for_non_bwa { input: + msg = "To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.", + runtime_environment = runtime_environment, + } + } + if (aligner_ != "bowtie2" && use_bowtie2_local_mode) { + call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: + msg = "To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.", + runtime_environment = runtime_environment, + } + } + if (aligner_ == "custom" && (!defined(custom_align_py) || !defined(custom_aligner_idx_tar))) { + call raise_exception as error_custom_aligner { input: + msg = "To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.", + runtime_environment = runtime_environment, + } + } + + if ((ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0) && num_ctl > 1 && length(ctl_paired_ends) > 1) { + call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: + msg = "Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for " + "multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). " + "Automatic control subsampling is enabled by default. " + "Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. " + "You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done " + "for individual control\'s TAG-ALIGN output according to each control\'s endedness. ", + runtime_environment = runtime_environment, + } + } + if (pipeline_type == "control" && num_ctl > 0) { + call raise_exception as error_ctl_input_defined_in_control_mode { input: + msg = "In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.", + runtime_environment = runtime_environment, + } + } + if (pipeline_type == "control" && num_rep_fastq == 0) { + call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: + msg = "Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.", + runtime_environment = runtime_environment, + } + } + + # align each replicate + scatter (i in range(num_rep)) { + # to override endedness definition for individual replicate + # paired_end will override paired_ends[i] + Boolean paired_end_ = if !defined(paired_end) && i < length(paired_ends) then paired_ends[i] else select_first([ + paired_end, + ]) + + Boolean has_input_of_align = i < length(fastqs_R1) && length(fastqs_R1[i]) > 0 + Boolean has_output_of_align = i < length(bams) + if (has_input_of_align && !has_output_of_align) { + call align { input: + fastqs_R1 = fastqs_R1[i], + fastqs_R2 = if paired_end_ then fastqs_R2[i] else [ + ], + crop_length = crop_length, + crop_length_tol = crop_length_tol, + trimmomatic_phred_score_format = trimmomatic_phred_score_format, + + aligner = aligner_, + mito_chr_name = mito_chr_name_, + custom_align_py = custom_align_py, + idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" then bowtie2_idx_tar_ else custom_aligner_idx_tar, + paired_end = paired_end_, + use_bwa_mem_for_pe = use_bwa_mem_for_pe, + bwa_mem_read_len_limit = bwa_mem_read_len_limit, + use_bowtie2_local_mode = use_bowtie2_local_mode, + ref_fa = ref_fa_, + + trimmomatic_java_heap = align_trimmomatic_java_heap, + cpu = align_cpu, + mem_factor = align_mem_factor_, + time_hr = align_time_hr, + disk_factor = align_disk_factor_, + runtime_environment = runtime_environment, + } + } + File? bam_ = if has_output_of_align then bams[i] else align.bam + + Boolean has_input_of_filter = has_output_of_align || defined(align.bam) + Boolean has_output_of_filter = i < length(nodup_bams) + # skip if we already have output of this step + if (has_input_of_filter && !has_output_of_filter) { + call filter { input: + bam = bam_, + paired_end = paired_end_, + ref_fa = ref_fa_, + redact_nodup_bam = redact_nodup_bam, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = no_dup_removal, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + } + File? nodup_bam_ = if has_output_of_filter then nodup_bams[i] else filter.nodup_bam + + Boolean has_input_of_bam2ta = has_output_of_filter || defined(filter.nodup_bam) + Boolean has_output_of_bam2ta = i < length(tas) + if (has_input_of_bam2ta && !has_output_of_bam2ta) { + call bam2ta { input: + bam = nodup_bam_, + subsample = subsample_reads, + paired_end = paired_end_, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + File? ta_ = if has_output_of_bam2ta then tas[i] else bam2ta.ta + + Boolean has_input_of_spr = has_output_of_bam2ta || defined(bam2ta.ta) + if (has_input_of_spr && !align_only_ && !true_rep_only) { + call spr { input: + ta = ta_, + paired_end = paired_end_, + pseudoreplication_random_seed = pseudoreplication_random_seed, + mem_factor = spr_mem_factor, + disk_factor = spr_disk_factor, + runtime_environment = runtime_environment, + } + } + + Boolean has_input_of_count_signal_track = has_output_of_bam2ta || defined(bam2ta.ta) + if (has_input_of_count_signal_track && enable_count_signal_track_) { + # generate count signal track + call count_signal_track { input: + ta = ta_, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + if (enable_gc_bias_ && defined(nodup_bam_) && defined(ref_fa_)) { + call gc_bias { input: + nodup_bam = nodup_bam_, + ref_fa = ref_fa_, + picard_java_heap = gc_bias_picard_java_heap, + runtime_environment = runtime_environment, + } + } + + # special trimming/mapping for xcor (when starting from FASTQs) + if (has_input_of_align) { + call align as align_R1 { input: + fastqs_R1 = fastqs_R1[i], + fastqs_R2 = [ + ], + trim_bp = xcor_trim_bp, + crop_length = 0, + crop_length_tol = 0, + trimmomatic_phred_score_format = trimmomatic_phred_score_format, + + aligner = aligner_, + mito_chr_name = mito_chr_name_, + custom_align_py = custom_align_py, + idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" then bowtie2_idx_tar_ else custom_aligner_idx_tar, + paired_end = false, + use_bwa_mem_for_pe = false, + bwa_mem_read_len_limit = 0, + use_bowtie2_local_mode = use_bowtie2_local_mode, + ref_fa = ref_fa_, + + cpu = align_cpu, + mem_factor = align_mem_factor_, + time_hr = align_time_hr, + disk_factor = align_disk_factor_, + runtime_environment = runtime_environment, + } + # no bam deduping for xcor + call filter as filter_R1 { input: + bam = align_R1.bam, + paired_end = false, + redact_nodup_bam = false, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = true, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + call bam2ta as bam2ta_no_dedup_R1 { input: + bam = filter_R1.nodup_bam, # it's named as nodup bam but it's not deduped but just filtered + paired_end = false, + subsample = 0, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + + # special trimming/mapping for xcor (when starting from BAMs) + Boolean has_input_of_bam2ta_no_dedup = (has_output_of_align || defined(align.bam)) && !defined(bam2ta_no_dedup_R1.ta) + if (has_input_of_bam2ta_no_dedup) { + call filter as filter_no_dedup { input: + bam = bam_, + paired_end = paired_end_, + redact_nodup_bam = false, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = true, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + call bam2ta as bam2ta_no_dedup { input: + bam = filter_no_dedup.nodup_bam, # output name is nodup but it's not deduped + paired_end = paired_end_, + subsample = 0, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + + # use trimmed/unfilitered R1 tagAlign for paired end dataset + # if not starting from fastqs, keep using old method + # (mapping with both ends for tag-aligns to be used for xcor) + # subsample tagalign (non-mito) and cross-correlation analysis + File? ta_xcor = if defined(bam2ta_no_dedup_R1.ta) then bam2ta_no_dedup_R1.ta else if defined(bam2ta_no_dedup.ta) then bam2ta_no_dedup.ta else ta_ + Boolean paired_end_xcor = if defined(bam2ta_no_dedup_R1.ta) then false else paired_end_ + + Boolean has_input_of_xcor = defined(ta_xcor) + if (has_input_of_xcor && enable_xcor_) { + call xcor { input: + ta = ta_xcor, + paired_end = paired_end_xcor, + subsample = xcor_subsample_reads, + mito_chr_name = mito_chr_name_, + chip_seq_type = pipeline_type, + exclusion_range_min = xcor_exclusion_range_min, + exclusion_range_max = xcor_exclusion_range_max, + cpu = xcor_cpu, + mem_factor = xcor_mem_factor, + time_hr = xcor_time_hr, + disk_factor = xcor_disk_factor, + runtime_environment = runtime_environment_spp, + } + } + + # before peak calling, get fragment length from xcor analysis or given input + # if fraglen [] is defined in the input JSON, fraglen from xcor will be ignored + Int? fraglen_ = if i < length(fraglen) then fraglen[i] else xcor.fraglen + } + + # align each control + scatter (i in range(num_ctl)) { + # to override endedness definition for individual control + # ctl_paired_end will override ctl_paired_ends[i] + Boolean ctl_paired_end_ = if !defined(ctl_paired_end) && i < length(ctl_paired_ends) then ctl_paired_ends[i] else select_first([ + ctl_paired_end, + paired_end, + ]) + + Boolean has_input_of_align_ctl = i < length(ctl_fastqs_R1) && length(ctl_fastqs_R1[i]) > 0 + Boolean has_output_of_align_ctl = i < length(ctl_bams) + if (has_input_of_align_ctl && !has_output_of_align_ctl) { + call align as align_ctl { input: + fastqs_R1 = ctl_fastqs_R1[i], + fastqs_R2 = if ctl_paired_end_ then ctl_fastqs_R2[i] else [ + ], + crop_length = crop_length, + crop_length_tol = crop_length_tol, + trimmomatic_phred_score_format = trimmomatic_phred_score_format, + + aligner = aligner_, + mito_chr_name = mito_chr_name_, + custom_align_py = custom_align_py, + idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" then bowtie2_idx_tar_ else custom_aligner_idx_tar, + paired_end = ctl_paired_end_, + use_bwa_mem_for_pe = use_bwa_mem_for_pe, + bwa_mem_read_len_limit = bwa_mem_read_len_limit, + use_bowtie2_local_mode = use_bowtie2_local_mode, + ref_fa = ref_fa_, + + trimmomatic_java_heap = align_trimmomatic_java_heap, + cpu = align_cpu, + mem_factor = align_mem_factor_, + time_hr = align_time_hr, + disk_factor = align_disk_factor_, + runtime_environment = runtime_environment, + } + } + File? ctl_bam_ = if has_output_of_align_ctl then ctl_bams[i] else align_ctl.bam + + Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam) + Boolean has_output_of_filter_ctl = i < length(ctl_nodup_bams) + # skip if we already have output of this step + if (has_input_of_filter_ctl && !has_output_of_filter_ctl) { + call filter as filter_ctl { input: + bam = ctl_bam_, + paired_end = ctl_paired_end_, + ref_fa = ref_fa_, + redact_nodup_bam = redact_nodup_bam, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = no_dup_removal, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + } + File? ctl_nodup_bam_ = if has_output_of_filter_ctl then ctl_nodup_bams[i] else filter_ctl.nodup_bam + + Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam) + Boolean has_output_of_bam2ta_ctl = i < length(ctl_tas) + if (has_input_of_bam2ta_ctl && !has_output_of_bam2ta_ctl) { + call bam2ta as bam2ta_ctl { input: + bam = ctl_nodup_bam_, + subsample = ctl_subsample_reads, + paired_end = ctl_paired_end_, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + File? ctl_ta_ = if has_output_of_bam2ta_ctl then ctl_tas[i] else bam2ta_ctl.ta + } + + # if there are TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta = length(select_all(ta_)) == num_rep + if (has_all_inputs_of_pool_ta && num_rep > 1) { + # pool tagaligns from true replicates + call pool_ta { input: + tas = ta_, + prefix = "rep", + runtime_environment = runtime_environment, + } + } + + # if there are pr1 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1)) == num_rep + if (has_all_inputs_of_pool_ta_pr1 && num_rep > 1 && !align_only_ && !true_rep_only) { + # pool tagaligns from pseudo replicate 1 + call pool_ta as pool_ta_pr1 { input: + tas = spr.ta_pr1, + prefix = "rep-pr1", + runtime_environment = runtime_environment, + } + } + + # if there are pr2 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2)) == num_rep + if (has_all_inputs_of_pool_ta_pr1 && num_rep > 1 && !align_only_ && !true_rep_only) { + # pool tagaligns from pseudo replicate 2 + call pool_ta as pool_ta_pr2 { input: + tas = spr.ta_pr2, + prefix = "rep-pr2", + runtime_environment = runtime_environment, + } + } + + # if there are CTL TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_)) == num_ctl + if (has_all_inputs_of_pool_ta_ctl && num_ctl > 1) { + # pool tagaligns from true replicates + call pool_ta as pool_ta_ctl { input: + tas = ctl_ta_, + prefix = "ctl", + runtime_environment = runtime_environment, + } + } + + Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) + if (has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep > 1) { + call count_signal_track as count_signal_track_pooled { input: + ta = pool_ta.ta_pooled, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_)) == num_rep + if (has_input_of_jsd && num_rep > 0 && enable_jsd_) { + # fingerprint and JS-distance plot + call jsd { input: + nodup_bams = nodup_bam_, + ctl_bams = ctl_nodup_bam_, # use first control only + blacklist = blacklist_, + mapq_thresh = mapq_thresh_, + + cpu = jsd_cpu, + mem_factor = jsd_mem_factor, + time_hr = jsd_time_hr, + disk_factor = jsd_disk_factor, + runtime_environment = runtime_environment, + } + } + + Boolean has_all_input_of_choose_ctl = length(select_all(ta_)) == num_rep && length(select_all(ctl_ta_)) == num_ctl && num_ctl > 0 + if (has_all_input_of_choose_ctl && !align_only_) { + # choose appropriate control for each exp IP replicate + # outputs: + # choose_ctl.idx : control replicate index for each exp replicate + # -1 means pooled ctl replicate + call choose_ctl { input: + tas = ta_, + ctl_tas = ctl_ta_, + ta_pooled = pool_ta.ta_pooled, + ctl_ta_pooled = pool_ta_ctl.ta_pooled, + always_use_pooled_ctl = always_use_pooled_ctl, + ctl_depth_ratio = ctl_depth_ratio, + ctl_depth_limit = ctl_depth_limit, + exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, + runtime_environment = runtime_environment, + } + } + + scatter (i in range(num_rep)) { + # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] + # chosen_ctl_ta_id + # >=0: control TA index (this means that control TA with this index exists) + # -1: use pooled control + # -2: there is no control + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then select_first([ + choose_ctl.chosen_ctl_ta_ids, + ])[i] else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then select_first([ + choose_ctl.chosen_ctl_ta_subsample, + ])[i] else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] else ctl_paired_end_[chosen_ctl_ta_id] + + if (chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0) { + call subsample_ctl { input: + ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled else ctl_ta_[chosen_ctl_ta_id], + subsample = chosen_ctl_ta_subsample, + paired_end = chosen_ctl_paired_end, + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment, + } + } + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [ + ] else if chosen_ctl_ta_subsample > 0 then [ + select_first([ + subsample_ctl.ta_subsampled, + ]), + ] else if chosen_ctl_ta_id == -1 then [ + select_first([ + pool_ta_ctl.ta_pooled, + ]), + ] else [ + select_first([ + ctl_ta_[chosen_ctl_ta_id], + ]), + ] + } + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then select_first([ + choose_ctl.chosen_ctl_ta_subsample_pooled, + ]) else 0 + + # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) + Array[Int] fraglen_tmp = select_all(fraglen_) + + # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks + scatter (i in range(num_rep)) { + Boolean has_input_of_call_peak = defined(ta_[i]) + Boolean has_output_of_call_peak = i < length(peaks) + if (has_input_of_call_peak && !has_output_of_call_peak && !align_only_) { + call call_peak { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + [ + ta_[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_tmp[i], + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + } + } + File? peak_ = if has_output_of_call_peak then peaks[i] else call_peak.peak + + # signal track + if (has_input_of_call_peak && !align_only_) { + call macs2_signal_track { input: + tas = flatten([ + [ + ta_[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_tmp[i], + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2, + } + } + + # call peaks on 1st pseudo replicated tagalign + Boolean has_input_of_call_peak_pr1 = defined(spr.ta_pr1[i]) + Boolean has_output_of_call_peak_pr1 = i < length(peaks_pr1) + if (has_input_of_call_peak_pr1 && !has_output_of_call_peak_pr1 && !true_rep_only) { + call call_peak as call_peak_pr1 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + [ + spr.ta_pr1[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_tmp[i], + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + } + } + File? peak_pr1_ = if has_output_of_call_peak_pr1 then peaks_pr1[i] else call_peak_pr1.peak + + # call peaks on 2nd pseudo replicated tagalign + Boolean has_input_of_call_peak_pr2 = defined(spr.ta_pr2[i]) + Boolean has_output_of_call_peak_pr2 = i < length(peaks_pr2) + if (has_input_of_call_peak_pr2 && !has_output_of_call_peak_pr2 && !true_rep_only) { + call call_peak as call_peak_pr2 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + [ + spr.ta_pr2[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_tmp[i], + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + } + } + File? peak_pr2_ = if has_output_of_call_peak_pr2 then peaks_pr2[i] else call_peak_pr2.peak + } + + # if ( !align_only_ && num_rep > 1 ) { + # rounded mean of fragment length, which will be used for + # 1) calling peaks for pooled true/pseudo replicates + # 2) calculating FRiP + call rounded_mean as fraglen_mean { input: + ints = fraglen_tmp, + runtime_environment = runtime_environment, + } + # } + + if (has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0) { + call subsample_ctl as subsample_ctl_pooled { input: + ta = if num_ctl < 2 then ctl_ta_[0] else pool_ta_ctl.ta_pooled, + subsample = chosen_ctl_ta_pooled_subsample, + paired_end = ctl_paired_end_[0], + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment, + } + } + # actually not an array + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [ + ] else if chosen_ctl_ta_pooled_subsample > 0 then [ + subsample_ctl_pooled.ta_subsampled, + ] else if num_ctl < 2 then [ + ctl_ta_[0], + ] else [ + pool_ta_ctl.ta_pooled, + ] + + Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) + Boolean has_output_of_call_peak_pooled = defined(peak_pooled) + if (has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep > 1) { + # call peaks on pooled replicate + # always call peaks for pooled replicate to get signal tracks + call call_peak as call_peak_pooled { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + select_all([ + pool_ta.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + } + } + File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled else call_peak_pooled.peak + + # macs2 signal track for pooled rep + if (has_input_of_call_peak_pooled && !align_only_ && num_rep > 1) { + call macs2_signal_track as macs2_signal_track_pooled { input: + tas = flatten([ + select_all([ + pool_ta.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_mean.rounded_mean, + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2, + } + } + + Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) + Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) + if (has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep > 1) { + # call peaks on 1st pooled pseudo replicates + call call_peak as call_peak_ppr1 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + select_all([ + pool_ta_pr1.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + } + } + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 else call_peak_ppr1.peak + + Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) + Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) + if (has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep > 1) { + # call peaks on 2nd pooled pseudo replicates + call call_peak as call_peak_ppr2 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + select_all([ + pool_ta_pr2.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + } + } + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 else call_peak_ppr2.peak + + # do IDR/overlap on all pairs of two replicates (i,j) + # where i and j are zero-based indices and 0 <= i < j < num_rep + scatter (pair in cross(range(num_rep), range(num_rep))) { + # pair.left = 0-based index of 1st replicate + # pair.right = 0-based index of 2nd replicate + File? peak1_ = peak_[pair.left] + File? peak2_ = peak_[pair.right] + if (!align_only_ && pair.left < pair.right) { + # Naive overlap on every pair of true replicates + call overlap { input: + prefix = "rep" + (pair.left + 1) + "_vs_rep" + (pair.right + 1), + peak1 = peak1_, + peak2 = peak2_, + peak_pooled = peak_pooled_, + fraglen = fraglen_mean.rounded_mean, + peak_type = peak_type_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + if (enable_idr && !align_only_ && pair.left < pair.right) { + # IDR on every pair of true replicates + call idr { input: + prefix = "rep" + (pair.left + 1) + "_vs_rep" + (pair.right + 1), + peak1 = peak1_, + peak2 = peak2_, + peak_pooled = peak_pooled_, + fraglen = fraglen_mean.rounded_mean, + idr_thresh = idr_thresh, + peak_type = peak_type_, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + } + + # overlap on pseudo-replicates (pr1, pr2) for each true replicate + if (!align_only_ && !true_rep_only) { + scatter (i in range(num_rep)) { + call overlap as overlap_pr { input: + prefix = "rep" + (i + 1) + "-pr1_vs_rep" + (i + 1) + "-pr2", + peak1 = peak_pr1_[i], + peak2 = peak_pr2_[i], + peak_pooled = peak_[i], + fraglen = fraglen_[i], + peak_type = peak_type_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = ta_[i], + runtime_environment = runtime_environment, + } + } + } + + if (!align_only_ && !true_rep_only && enable_idr) { + scatter (i in range(num_rep)) { + # IDR on pseduo replicates + call idr as idr_pr { input: + prefix = "rep" + (i + 1) + "-pr1_vs_rep" + (i + 1) + "-pr2", + peak1 = peak_pr1_[i], + peak2 = peak_pr2_[i], + peak_pooled = peak_[i], + fraglen = fraglen_[i], + idr_thresh = idr_thresh, + peak_type = peak_type_, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = ta_[i], + runtime_environment = runtime_environment, + } + } + } + + if (!align_only_ && !true_rep_only && num_rep > 1) { + # Naive overlap on pooled pseudo replicates + call overlap as overlap_ppr { input: + prefix = "pooled-pr1_vs_pooled-pr2", + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + + if (!align_only_ && !true_rep_only && num_rep > 1 && enable_idr) { + # IDR on pooled pseduo replicates + call idr as idr_ppr { input: + prefix = "pooled-pr1_vs_pooled-pr2", + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + idr_thresh = idr_thresh, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + + # reproducibility QC for overlap/IDR peaks + if (!align_only_ && !true_rep_only && num_rep > 0) { + # reproducibility QC for overlapping peaks + call reproducibility as reproducibility_overlap { input: + prefix = "overlap", + peaks = select_all(overlap.bfilt_overlap_peak), + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([ + overlap_pr.bfilt_overlap_peak, + ]) else [ + ], + peak_ppr = overlap_ppr.bfilt_overlap_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + if (!align_only_ && !true_rep_only && num_rep > 0 && enable_idr) { + # reproducibility QC for IDR peaks + call reproducibility as reproducibility_idr { input: + prefix = "idr", + peaks = select_all(idr.bfilt_idr_peak), + peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([ + idr_pr.bfilt_idr_peak, + ]) else [ + ], + peak_ppr = idr_ppr.bfilt_idr_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + # Generate final QC report and JSON + call qc_report { input: + pipeline_ver = pipeline_ver, + title = title, + description = description, + genome = genome_name_, + paired_ends = paired_end_, + ctl_paired_ends = ctl_paired_end_, + pipeline_type = pipeline_type, + aligner = aligner_, + no_dup_removal = no_dup_removal, + peak_caller = peak_caller_, + cap_num_peak = cap_num_peak_, + idr_thresh = idr_thresh, + pval_thresh = pval_thresh, + xcor_trim_bp = xcor_trim_bp, + xcor_subsample_reads = xcor_subsample_reads, + + samstat_qcs = select_all(align.samstat_qc), + nodup_samstat_qcs = select_all(filter.samstat_qc), + dup_qcs = select_all(filter.dup_qc), + lib_complexity_qcs = select_all(filter.lib_complexity_qc), + xcor_plots = select_all(xcor.plot_png), + xcor_scores = select_all(xcor.score), + + ctl_samstat_qcs = select_all(align_ctl.samstat_qc), + ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), + ctl_dup_qcs = select_all(filter_ctl.dup_qc), + ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), + + jsd_plot = jsd.plot, + jsd_qcs = if defined(jsd.jsd_qcs) then select_first([ + jsd.jsd_qcs, + ]) else [ + ], + + frip_qcs = select_all(call_peak.frip_qc), + frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), + frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), + frip_qc_pooled = call_peak_pooled.frip_qc, + frip_qc_ppr1 = call_peak_ppr1.frip_qc, + frip_qc_ppr2 = call_peak_ppr2.frip_qc, + + idr_plots = select_all(idr.idr_plot), + idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([ + idr_pr.idr_plot, + ]) else [ + ], + idr_plot_ppr = idr_ppr.idr_plot, + frip_idr_qcs = select_all(idr.frip_qc), + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([ + idr_pr.frip_qc, + ]) else [ + ], + frip_idr_qc_ppr = idr_ppr.frip_qc, + frip_overlap_qcs = select_all(overlap.frip_qc), + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([ + overlap_pr.frip_qc, + ]) else [ + ], + frip_overlap_qc_ppr = overlap_ppr.frip_qc, + idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, + overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, + + gc_plots = select_all(gc_bias.gc_plot), + + peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), + peak_region_size_plots = select_all(call_peak.peak_region_size_plot), + num_peak_qcs = select_all(call_peak.num_peak_qc), + + idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, + idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, + + overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, + overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, + + runtime_environment = runtime_environment, + } + + output { + File report = qc_report.report + File qc_json = qc_report.qc_json + Boolean qc_json_ref_match = qc_report.qc_json_ref_match + } +} + +task align { + input { + Array[File] fastqs_R1 # [merge_id] + Array[File] fastqs_R2 + File? ref_fa + Int? trim_bp # this is for R1 only + Int crop_length + Int crop_length_tol + String? trimmomatic_phred_score_format + + String aligner + + String mito_chr_name + Int? multimapping + File? custom_align_py + File? idx_tar # reference index tar + Boolean paired_end + Boolean use_bwa_mem_for_pe + Int bwa_mem_read_len_limit + Boolean use_bowtie2_local_mode + + String? trimmomatic_java_heap + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") + Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) + + Float trimmomatic_java_heap_factor = 0.9 + Array[Array[File]] tmp_fastqs = if paired_end then transpose([ + fastqs_R1, + fastqs_R2, + ]) else transpose([ + fastqs_R1, + ]) + + command <<< + + set -e + + # check if pipeline dependencies can be found + if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] + then + echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 + exit 3 + fi + python3 $(which encode_task_merge_fastq.py) \ + ~{write_tsv(tmp_fastqs)} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{"--nth " + cpu} + + if [ -z '~{trim_bp}' ]; then + SUFFIX= + else + SUFFIX=_trimmed + python3 $(which encode_task_trim_fastq.py) \ + R1/*.fastq.gz \ + --trim-bp ~{trim_bp} \ + --out-dir R1$SUFFIX + if [ '~{paired_end}' == 'true' ]; then + python3 $(which encode_task_trim_fastq.py) \ + R2/*.fastq.gz \ + --trim-bp ~{trim_bp} \ + --out-dir R2$SUFFIX + fi + fi + if [ '~{crop_length}' == '0' ]; then + SUFFIX=$SUFFIX + else + NEW_SUFFIX="$SUFFIX"_cropped + python3 $(which encode_task_trimmomatic.py) \ + --fastq1 R1$SUFFIX/*.fastq.gz \ + ~{if paired_end then "--fastq2 R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end then "--paired-end" else ""} \ + --crop-length ~{crop_length} \ + --crop-length-tol "~{crop_length_tol}" \ + ~{"--phred-score-format " + trimmomatic_phred_score_format} \ + --out-dir-R1 R1$NEW_SUFFIX \ + ~{if paired_end then "--out-dir-R2 R2$NEW_SUFFIX" else ""} \ + ~{"--trimmomatic-java-heap " + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + "G")} \ + ~{"--nth " + cpu} + SUFFIX=$NEW_SUFFIX + fi + + if [ '~{aligner}' == 'bwa' ]; then + python3 $(which encode_task_bwa.py) \ + ~{idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{if use_bwa_mem_for_pe then "--use-bwa-mem-for-pe" else ""} \ + ~{"--bwa-mem-read-len-limit " + bwa_mem_read_len_limit} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + + elif [ '~{aligner}' == 'bowtie2' ]; then + python3 $(which encode_task_bowtie2.py) \ + ~{idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ + ~{"--multimapping " + multimapping} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{if use_bowtie2_local_mode then "--local" else ""} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + else + python3 ~{custom_align_py} \ + ~{idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + fi + + python3 $(which encode_task_post_align.py) \ + R1$SUFFIX/*.fastq.gz $(ls *.bam) \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + rm -rf R1 R2 R1$SUFFIX R2$SUFFIX + + >>> + + output { + File bam = glob("*.bam")[0] + File bai = glob("*.bai")[0] + File samstat_qc = glob("*.samstats.qc")[0] + File read_len_log = glob("*.read_length.txt")[0] + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + preemptible: 0 + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task filter { + input { + File? bam + Boolean paired_end + File? ref_fa + Boolean redact_nodup_bam + String dup_marker # picard.jar MarkDuplicates (picard) or + # sambamba markdup (sambamba) + Int mapq_thresh # threshold for low MAPQ reads removal + Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM + File chrsz # 2-col chromosome sizes file + Boolean no_dup_removal # no dupe reads removal when filtering BAM + String mito_chr_name + + Int cpu + Float mem_factor + String? picard_java_heap + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(bam, "G") + Float picard_java_heap_factor = 0.9 + Float mem_gb = 6.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + python3 $(which encode_task_filter.py) \ + ~{bam} \ + ~{if paired_end then "--paired-end" else ""} \ + --multimapping 0 \ + ~{"--dup-marker " + dup_marker} \ + ~{"--mapq-thresh " + mapq_thresh} \ + --filter-chrs ~{sep=" " filter_chrs} \ + ~{"--chrsz " + chrsz} \ + ~{if no_dup_removal then "--no-dup-removal" else ""} \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} \ + ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G")} + + if [ '~{redact_nodup_bam}' == 'true' ]; then + python3 $(which encode_task_bam_to_pbam.py) \ + $(ls *.bam) \ + ~{"--ref-fa " + ref_fa} \ + '--delete-original-bam' + fi + + >>> + + output { + File nodup_bam = glob("*.bam")[0] + File nodup_bai = glob("*.bai")[0] + File samstat_qc = glob("*.samstats.qc")[0] + File dup_qc = glob("*.dup.qc")[0] + File lib_complexity_qc = glob("*.lib_complexity.qc")[0] + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task bam2ta { + input { + File? bam + Boolean paired_end + String mito_chr_name # mito chromosome name + Int subsample # number of reads to subsample TAGALIGN + # this affects all downstream analysis + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + python3 $(which encode_task_bam2ta.py) \ + ~{bam} \ + --disable-tn5-shift \ + ~{if paired_end then "--paired-end" else ""} \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--subsample " + subsample} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + + >>> + + output { + File ta = glob("*.tagAlign.gz")[0] + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task spr { + input { + File? ta + Boolean paired_end + Int pseudoreplication_random_seed + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + python3 $(which encode_task_spr.py) \ + ~{ta} \ + ~{"--pseudoreplication-random-seed " + pseudoreplication_random_seed} \ + ~{if paired_end then "--paired-end" else ""} + + >>> + + output { + File ta_pr1 = glob("*.pr1.tagAlign.gz")[0] + File ta_pr2 = glob("*.pr2.tagAlign.gz")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 4 + disks: "local-disk ~{disk_gb} SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task pool_ta { + input { + Array[File?] tas + Int? col # number of columns in pooled TA + String? prefix # basename prefix + + RuntimeEnvironment runtime_environment + } + + command <<< + + set -e + python3 $(which encode_task_pool_ta.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--prefix " + prefix} \ + ~{"--col " + col} + + >>> + + output { + File ta_pooled = glob("*.tagAlign.gz")[0] + } + + runtime { + cpu: 1 + memory: "8 GB" + time: 4 + disks: "local-disk 100 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task xcor { + input { + File? ta + Boolean paired_end + String mito_chr_name + Int subsample # number of reads to subsample TAGALIGN + # this will be used for xcor only + # will not affect any downstream analysis + String? chip_seq_type + Int? exclusion_range_min + Int? exclusion_range_max + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 8.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + python3 $(which encode_task_xcor.py) \ + ~{ta} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--subsample " + subsample} \ + ~{"--chip-seq-type " + chip_seq_type} \ + ~{"--exclusion-range-min " + exclusion_range_min} \ + ~{"--exclusion-range-max " + exclusion_range_max} \ + ~{"--subsample " + subsample} \ + ~{"--nth " + cpu} + + >>> + + output { + File plot_pdf = glob("*.cc.plot.pdf")[0] + File plot_png = glob("*.cc.plot.png")[0] + File score = glob("*.cc.qc")[0] + File fraglen_log = glob("*.cc.fraglen.txt")[0] + Int fraglen = read_int(fraglen_log) + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task jsd { + input { + Array[File?] nodup_bams + Array[File?] ctl_bams + File? blacklist + Int mapq_thresh + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") + Float mem_gb = 5.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + python3 $(which encode_task_jsd.py) \ + ~{sep=" " select_all(nodup_bams)} \ + ~{if length(ctl_bams) > 0 then "--ctl-bam " + select_first(ctl_bams) else ""} \ + ~{"--mapq-thresh " + mapq_thresh} \ + ~{"--blacklist " + blacklist} \ + ~{"--nth " + cpu} + + >>> + + output { + File plot = glob("*.png")[0] + Array[File] jsd_qcs = glob("*.jsd.qc") + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task choose_ctl { + input { + Array[File?] tas + Array[File?] ctl_tas + File? ta_pooled + File? ctl_ta_pooled + Boolean always_use_pooled_ctl # always use pooled control for all exp rep. + Float ctl_depth_ratio # if ratio between controls is higher than this + # then always use pooled control for all exp rep. + Int ctl_depth_limit + Float exp_ctl_depth_ratio_limit + + RuntimeEnvironment runtime_environment + } + + command <<< + + set -e + python3 $(which encode_task_choose_ctl.py) \ + --tas ~{sep=" " select_all(tas)} \ + --ctl-tas ~{sep=" " select_all(ctl_tas)} \ + ~{"--ta-pooled " + ta_pooled} \ + ~{"--ctl-ta-pooled " + ctl_ta_pooled} \ + ~{if always_use_pooled_ctl then "--always-use-pooled-ctl" else ""} \ + ~{"--ctl-depth-ratio " + ctl_depth_ratio} \ + ~{"--ctl-depth-limit " + ctl_depth_limit} \ + ~{"--exp-ctl-depth-ratio-limit " + exp_ctl_depth_ratio_limit} + + >>> + + output { + File chosen_ctl_id_tsv = glob("chosen_ctl.tsv")[0] + File chosen_ctl_subsample_tsv = glob("chosen_ctl_subsample.tsv")[0] + File chosen_ctl_subsample_pooled_txt = glob("chosen_ctl_subsample_pooled.txt")[0] + Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) + Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) + Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task count_signal_track { + input { + File? ta # tag-align + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + + Float mem_gb = 8.0 + + command <<< + + set -e + python3 $(which encode_task_count_signal_track.py) \ + ~{ta} \ + ~{"--chrsz " + chrsz} \ + ~{"--mem-gb " + mem_gb} + + >>> + + output { + File pos_bw = glob("*.positive.bigwig")[0] + File neg_bw = glob("*.negative.bigwig")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 4 + disks: "local-disk 50 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task subsample_ctl { + input { + File? ta + Boolean paired_end + Int subsample + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + python3 $(which encode_task_subsample_ctl.py) \ + ~{ta} \ + ~{"--subsample " + subsample} \ + ~{if paired_end then "--paired-end" else ""} \ + + >>> + + output { + File ta_subsampled = glob("*.tagAlign.gz")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 4 + disks: "local-disk ~{disk_gb} SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task call_peak { + input { + String peak_caller + String peak_type + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Int cap_num_peak # cap number of raw peaks called from MACS2 + Float pval_thresh # p.value threshold for MACS2 + Float? fdr_thresh # FDR threshold for SPP + + File? blacklist # blacklist BED to filter raw peaks + String? regex_bfilt_peak_chr_name + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + + if [ '~{peak_caller}' == 'macs2' ]; then + python3 $(which encode_task_macs2_chip.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--gensz " + gensz} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--cap-num-peak " + cap_num_peak} \ + ~{"--pval-thresh " + pval_thresh} \ + ~{"--mem-gb " + mem_gb} + + elif [ '~{peak_caller}' == 'spp' ]; then + python3 $(which encode_task_spp.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--cap-num-peak " + cap_num_peak} \ + ~{"--fdr-thresh " + fdr_thresh} \ + ~{"--nth " + cpu} + fi + + python3 $(which encode_task_post_call_peak_chip.py) \ + $(ls *Peak.gz) \ + ~{"--ta " + tas[0]} \ + ~{"--regex-bfilt-peak-chr-name \'" + regex_bfilt_peak_chr_name + "\'"} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--peak-type " + peak_type} \ + ~{"--blacklist " + blacklist} + + >>> + + output { + File peak = glob("*[!.][!b][!f][!i][!l][!t]." + peak_type + ".gz")[0] + # generated by post_call_peak py + File bfilt_peak = glob("*.bfilt." + peak_type + ".gz")[0] + File bfilt_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] + File bfilt_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] + File bfilt_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] + File bfilt_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] + File frip_qc = glob("*.frip.qc")[0] + File peak_region_size_qc = glob("*.peak_region_size.qc")[0] + File peak_region_size_plot = glob("*.peak_region_size.png")[0] + File num_peak_qc = glob("*.num_peak.qc")[0] + } + + runtime { + cpu: if peak_caller == "macs2" then 2 else cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + preemptible: 0 + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task macs2_signal_track { + input { + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Float pval_thresh # p.value threshold + + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + + set -e + python3 $(which encode_task_macs2_signal_track_chip.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--gensz " + gensz} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--pval-thresh " + pval_thresh} \ + ~{"--mem-gb " + mem_gb} + + >>> + + output { + File pval_bw = glob("*.pval.signal.bigwig")[0] + File fc_bw = glob("*.fc.signal.bigwig")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + preemptible: 0 + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task idr { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + Float idr_thresh + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor + File chrsz # 2-col chromosome sizes file + String peak_type + String rank + + RuntimeEnvironment runtime_environment + } + + command <<< + + set -e + ~{if defined(ta) then "" else "touch null.frip.qc"} + touch null + python3 $(which encode_task_idr.py) \ + ~{peak1} ~{peak2} ~{peak_pooled} \ + ~{"--prefix " + prefix} \ + ~{"--idr-thresh " + idr_thresh} \ + ~{"--peak-type " + peak_type} \ + --idr-rank ~{rank} \ + ~{"--fraglen " + fraglen} \ + ~{"--chrsz " + chrsz} \ + ~{"--blacklist " + blacklist} \ + ~{"--regex-bfilt-peak-chr-name \'" + regex_bfilt_peak_chr_name + "\'"} \ + ~{"--ta " + ta} + + >>> + + output { + File idr_peak = glob("*[!.][!b][!f][!i][!l][!t]." + peak_type + ".gz")[0] + File bfilt_idr_peak = glob("*.bfilt." + peak_type + ".gz")[0] + File bfilt_idr_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] + File bfilt_idr_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] + File bfilt_idr_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] + File bfilt_idr_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] + File idr_plot = glob("*.txt.png")[0] + File idr_unthresholded_peak = glob("*.txt.gz")[0] + File idr_log = glob("*.idr*.log")[0] + File frip_qc = if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task overlap { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor (for FRIP) + File chrsz # 2-col chromosome sizes file + String peak_type + + RuntimeEnvironment runtime_environment + } + + command <<< + + set -e + ~{if defined(ta) then "" else "touch null.frip.qc"} + touch null + python3 $(which encode_task_overlap.py) \ + ~{peak1} ~{peak2} ~{peak_pooled} \ + ~{"--prefix " + prefix} \ + ~{"--peak-type " + peak_type} \ + ~{"--fraglen " + fraglen} \ + ~{"--chrsz " + chrsz} \ + ~{"--blacklist " + blacklist} \ + --nonamecheck \ + ~{"--regex-bfilt-peak-chr-name \'" + regex_bfilt_peak_chr_name + "\'"} \ + ~{"--ta " + ta} + + >>> + + output { + File overlap_peak = glob("*[!.][!b][!f][!i][!l][!t]." + peak_type + ".gz")[0] + File bfilt_overlap_peak = glob("*.bfilt." + peak_type + ".gz")[0] + File bfilt_overlap_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] + File bfilt_overlap_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] + File bfilt_overlap_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] + File bfilt_overlap_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] + File frip_qc = if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task reproducibility { + input { + String prefix + Array[File] peaks # peak files from pair of true replicates + # in a sorted order. for example of 4 replicates, + # 1,2 1,3 1,4 2,3 2,4 3,4. + # x,y means peak file from rep-x vs rep-y + Array[File] peaks_pr # peak files from pseudo replicates + File? peak_ppr # Peak file from pooled pseudo replicate. + String peak_type + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + + command <<< + + set -e + python3 $(which encode_task_reproducibility.py) \ + ~{sep=" " peaks} \ + --peaks-pr ~{sep=" " peaks_pr} \ + ~{"--peak-ppr " + peak_ppr} \ + --prefix ~{prefix} \ + ~{"--peak-type " + peak_type} \ + ~{"--chrsz " + chrsz} + + >>> + + output { + File optimal_peak = glob("*optimal_peak.*.gz")[0] + File optimal_peak_bb = glob("*optimal_peak.*.bb")[0] + File optimal_peak_starch = glob("*optimal_peak.*.starch")[0] + File optimal_peak_hammock = glob("*optimal_peak.*.hammock.gz*")[0] + File optimal_peak_hammock_tbi = glob("*optimal_peak.*.hammock.gz*")[1] + File conservative_peak = glob("*conservative_peak.*.gz")[0] + File conservative_peak_bb = glob("*conservative_peak.*.bb")[0] + File conservative_peak_starch = glob("*conservative_peak.*.starch")[0] + File conservative_peak_hammock = glob("*conservative_peak.*.hammock.gz*")[0] + File conservative_peak_hammock_tbi = glob("*conservative_peak.*.hammock.gz*")[1] + File reproducibility_qc = glob("*reproducibility.qc")[0] + # QC metrics for optimal peak + File peak_region_size_qc = glob("*.peak_region_size.qc")[0] + File peak_region_size_plot = glob("*.peak_region_size.png")[0] + File num_peak_qc = glob("*.num_peak.qc")[0] + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task gc_bias { + input { + File? nodup_bam + File ref_fa + + String? picard_java_heap + + RuntimeEnvironment runtime_environment + } + + Float mem_factor = 0.3 + Float input_file_size_gb = size(nodup_bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float picard_java_heap_factor = 0.9 + + command <<< + + set -e + python3 $(which encode_task_gc_bias.py) \ + ~{"--nodup-bam " + nodup_bam} \ + ~{"--ref-fa " + ref_fa} \ + ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G")} + + >>> + + output { + File gc_plot = glob("*.gc_plot.png")[0] + File gc_log = glob("*.gc.txt")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 6 + disks: "local-disk 250 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task qc_report { + input { + # optional metadata + String pipeline_ver + String title # name of sample + String description # description for sample + String? genome + #String? encode_accession_id # ENCODE accession ID of sample + # workflow params + Array[Boolean] paired_ends + Array[Boolean] ctl_paired_ends + String pipeline_type + String aligner + Boolean no_dup_removal + String peak_caller + Int cap_num_peak + Float idr_thresh + Float pval_thresh + Int xcor_trim_bp + Int xcor_subsample_reads + # QCs + Array[File] samstat_qcs + Array[File] nodup_samstat_qcs + Array[File] dup_qcs + Array[File] lib_complexity_qcs + Array[File] ctl_samstat_qcs + Array[File] ctl_nodup_samstat_qcs + Array[File] ctl_dup_qcs + Array[File] ctl_lib_complexity_qcs + Array[File] xcor_plots + Array[File] xcor_scores + File? jsd_plot + Array[File] jsd_qcs + Array[File] idr_plots + Array[File] idr_plots_pr + File? idr_plot_ppr + Array[File] frip_qcs + Array[File] frip_qcs_pr1 + Array[File] frip_qcs_pr2 + File? frip_qc_pooled + File? frip_qc_ppr1 + File? frip_qc_ppr2 + Array[File] frip_idr_qcs + Array[File] frip_idr_qcs_pr + File? frip_idr_qc_ppr + Array[File] frip_overlap_qcs + Array[File] frip_overlap_qcs_pr + File? frip_overlap_qc_ppr + File? idr_reproducibility_qc + File? overlap_reproducibility_qc + + Array[File] gc_plots + + Array[File] peak_region_size_qcs + Array[File] peak_region_size_plots + Array[File] num_peak_qcs + + File? idr_opt_peak_region_size_qc + File? idr_opt_peak_region_size_plot + File? idr_opt_num_peak_qc + + File? overlap_opt_peak_region_size_qc + File? overlap_opt_peak_region_size_plot + File? overlap_opt_num_peak_qc + + File? qc_json_ref + + RuntimeEnvironment runtime_environment + } + + command <<< + + set -e + python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ + ~{"--pipeline-ver " + pipeline_ver} \ + ~{"--title '" + sub(title, "'", "_") + "'"} \ + ~{"--desc '" + sub(description, "'", "_") + "'"} \ + ~{"--genome " + genome} \ + ~{"--multimapping " + 0} \ + --paired-ends ~{sep=" " paired_ends} \ + --ctl-paired-ends ~{sep=" " ctl_paired_ends} \ + --pipeline-type ~{pipeline_type} \ + --aligner ~{aligner} \ + ~{if (no_dup_removal) then "--no-dup-removal " else ""} \ + --peak-caller ~{peak_caller} \ + ~{"--cap-num-peak " + cap_num_peak} \ + --idr-thresh ~{idr_thresh} \ + --pval-thresh ~{pval_thresh} \ + --xcor-trim-bp ~{xcor_trim_bp} \ + --xcor-subsample-reads ~{xcor_subsample_reads} \ + --samstat-qcs ~{sep="_:_" samstat_qcs} \ + --nodup-samstat-qcs ~{sep="_:_" nodup_samstat_qcs} \ + --dup-qcs ~{sep="_:_" dup_qcs} \ + --lib-complexity-qcs ~{sep="_:_" lib_complexity_qcs} \ + --xcor-plots ~{sep="_:_" xcor_plots} \ + --xcor-scores ~{sep="_:_" xcor_scores} \ + --idr-plots ~{sep="_:_" idr_plots} \ + --idr-plots-pr ~{sep="_:_" idr_plots_pr} \ + --ctl-samstat-qcs ~{sep="_:_" ctl_samstat_qcs} \ + --ctl-nodup-samstat-qcs ~{sep="_:_" ctl_nodup_samstat_qcs} \ + --ctl-dup-qcs ~{sep="_:_" ctl_dup_qcs} \ + --ctl-lib-complexity-qcs ~{sep="_:_" ctl_lib_complexity_qcs} \ + ~{"--jsd-plot " + jsd_plot} \ + --jsd-qcs ~{sep="_:_" jsd_qcs} \ + ~{"--idr-plot-ppr " + idr_plot_ppr} \ + --frip-qcs ~{sep="_:_" frip_qcs} \ + --frip-qcs-pr1 ~{sep="_:_" frip_qcs_pr1} \ + --frip-qcs-pr2 ~{sep="_:_" frip_qcs_pr2} \ + ~{"--frip-qc-pooled " + frip_qc_pooled} \ + ~{"--frip-qc-ppr1 " + frip_qc_ppr1} \ + ~{"--frip-qc-ppr2 " + frip_qc_ppr2} \ + --frip-idr-qcs ~{sep="_:_" frip_idr_qcs} \ + --frip-idr-qcs-pr ~{sep="_:_" frip_idr_qcs_pr} \ + ~{"--frip-idr-qc-ppr " + frip_idr_qc_ppr} \ + --frip-overlap-qcs ~{sep="_:_" frip_overlap_qcs} \ + --frip-overlap-qcs-pr ~{sep="_:_" frip_overlap_qcs_pr} \ + ~{"--frip-overlap-qc-ppr " + frip_overlap_qc_ppr} \ + ~{"--idr-reproducibility-qc " + idr_reproducibility_qc} \ + ~{"--overlap-reproducibility-qc " + overlap_reproducibility_qc} \ + --gc-plots ~{sep="_:_" gc_plots} \ + --peak-region-size-qcs ~{sep="_:_" peak_region_size_qcs} \ + --peak-region-size-plots ~{sep="_:_" peak_region_size_plots} \ + --num-peak-qcs ~{sep="_:_" num_peak_qcs} \ + ~{"--idr-opt-peak-region-size-qc " + idr_opt_peak_region_size_qc} \ + ~{"--idr-opt-peak-region-size-plot " + idr_opt_peak_region_size_plot} \ + ~{"--idr-opt-num-peak-qc " + idr_opt_num_peak_qc} \ + ~{"--overlap-opt-peak-region-size-qc " + overlap_opt_peak_region_size_qc} \ + ~{"--overlap-opt-peak-region-size-plot " + overlap_opt_peak_region_size_plot} \ + ~{"--overlap-opt-num-peak-qc " + overlap_opt_num_peak_qc} \ + --out-qc-html qc.html \ + --out-qc-json qc.json \ + ~{"--qc-json-ref " + qc_json_ref} + + >>> + + output { + File report = glob("*qc.html")[0] + File qc_json = glob("*qc.json")[0] + Boolean qc_json_ref_match = read_string("qc_json_ref_match.txt") == "True" + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +### workflow system tasks +task read_genome_tsv { + input { + File? genome_tsv + String? null_s + + RuntimeEnvironment runtime_environment + } + + command <<< + + echo "$(basename ~{genome_tsv})" > genome_name + # create empty files for all entries + touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 + touch mito_chr_name + touch regex_bfilt_peak_chr_name + + python <>> + + output { + String? genome_name = read_string("genome_name") + String? ref_fa = if size("ref_fa") == 0 then null_s else read_string("ref_fa") + String? bwa_idx_tar = if size("bwa_idx_tar") == 0 then null_s else read_string("bwa_idx_tar") + String? bowtie2_idx_tar = if size("bowtie2_idx_tar") == 0 then null_s else read_string("bowtie2_idx_tar") + String? chrsz = if size("chrsz") == 0 then null_s else read_string("chrsz") + String? gensz = if size("gensz") == 0 then null_s else read_string("gensz") + String? blacklist = if size("blacklist") == 0 then null_s else read_string("blacklist") + String? blacklist2 = if size("blacklist2") == 0 then null_s else read_string("blacklist2") + String? mito_chr_name = if size("mito_chr_name") == 0 then null_s else read_string("mito_chr_name") + String? regex_bfilt_peak_chr_name = if size("regex_bfilt_peak_chr_name") == 0 then "chr[\\dXY]+" else read_string("regex_bfilt_peak_chr_name") + } + + runtime { + maxRetries: 0 + cpu: 1 + memory: "2 GB" + time: 4 + disks: "local-disk 10 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task rounded_mean { + input { + Array[Int] ints + + RuntimeEnvironment runtime_environment + } + + command <<< + + python <>> + + output { + Int rounded_mean = read_int("tmp.txt") + } + + runtime { + cpu: 1 + memory: "2 GB" + time: 4 + disks: "local-disk 10 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task raise_exception { + input { + String msg + + RuntimeEnvironment runtime_environment + } + + command <<< + + echo -e "\n* Error: ~{msg}\n" >&2 + exit 2 + + >>> + + output { + String error_msg = "~{msg}" + } + + runtime { + maxRetries: 0 + cpu: 1 + memory: "2 GB" + time: 4 + disks: "local-disk 10 SSD" + + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} diff --git a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl new file mode 100644 index 000000000..491e12a03 --- /dev/null +++ b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl @@ -0,0 +1,188 @@ +## # Header +# regular comment will be left as is +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput + +## part of preamble +version 1.2 + +#@ except: MissingMetas +struct AStruct { + String member +} + +task a_task { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input + # Here is a comment before the input. + { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + command <<< + + >>> + + output + # Here is a comment before the output. + { + Object some_other_object = { + } + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct { + } + } + + requirements + # This is a comment before the requirements. + { + container: "ubuntu:latest" + } + + hints { + max_cpu: 1 + } +} + +## These double-pound-sign comments +## should be converted to single-pound-sign comments. +workflow hello { + meta + # Here is a comment between `meta` and the parenthesis. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the parenthesis. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] ## This should be converted to a single-pound-sign comment. + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + call a_task { + } + + scatter (name in name_array) { + call say_task { greeting = greeting } + } + + if (some_condition_task) { + call a_task as task_two { + } + } + + output + # Here is a comment before the output. + { + Object some_other_object = { + } + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct { + } + } +} diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl new file mode 100644 index 000000000..705f360ba --- /dev/null +++ b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -0,0 +1,123 @@ +version 1.0 + +workflow test_wf { + meta { + a: "hello" + b: "world" + c: 5 + d: -0xf + e: 1.0e10 + f: -2. + g: true + h: false + i: null + j: { + a: [ + 1, + 2, + 3, + ], + b: [ + "hello", + "world", + "!", + ], + c: { + x: 1, + y: 2, + z: 3, + }, + } + k: [ + { + a: { + }, + b: 0, + c: "", + d: "", + e: [ + ], + }, + { + x: [ + 1.0, + 2.0, + 3.0, + ], + }, + ] + } + + parameter_meta { + out_sj_filter_overhang_min: { + type: "SpliceJunctionMotifs", + label: "Minimum overhang required to support a splicing junction", + } + } + + input { + SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { + noncanonical_motifs: 30, + GT_AG_and_CT_AC_motif: 12, + } + } + + call no_params call with_params { input: + a, + b, + c, + d = 1, + } + call qualified.name call qualified.name { input: + a = 1, + b = 2, + c = "3", + } + call aliased as x call aliased as x { input: + } + call f after x after y call f after x after y { input: a = [ + ] } + call f as x after x call f as x after x after y { input: name = "hello" } + call test_task as foo { input: bowchicka = "wowwow" } + if (true) { + + call test_task after foo { input: bowchicka = "bowchicka" } + scatter (i in range(3)) { + call test_task as bar { input: bowchicka = i * 42 } + } + } + + output { + SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min + String a = "friend" + Int b = 1 + 2 + String c = "Hello, ~{a}" + Map[String,Int] d = { + "a": 0, + "b": 1, + "c": 2, + } + } + +} + +task test_task { + parameter_meta { + bowchicka: { + type: "String", + label: "Bowchicka", + } + } + + input { + String bowchicka + } + + command <<< + >>> +} + +struct SpliceJunctionMotifs { + Int noncanonical_motifs + Int GT_AG_and_CT_AC_motif +} diff --git a/wdl-format/tests/format/interrupt_example/source.formatted.wdl b/wdl-format/tests/format/interrupt_example/source.formatted.wdl index 4797ab7c7..492123837 100644 --- a/wdl-format/tests/format/interrupt_example/source.formatted.wdl +++ b/wdl-format/tests/format/interrupt_example/source.formatted.wdl @@ -9,5 +9,4 @@ workflow meta # interrupt { # how far should this bracket be indented? } - } diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl index 05e82bd42..cdf974454 100644 --- a/wdl-format/tests/format/seaseq-case/source.formatted.wdl +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -935,5 +935,4 @@ workflow seaseq { merge_overallsummary.summarytxt, ]) } - } From 23331da961f7f4ca357750ca30defa162789e17d Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Thu, 10 Oct 2024 11:43:15 -0400 Subject: [PATCH 17/60] fix: escape quotes when writing strings --- wdl-format/src/v1/expr.rs | 45 ++++++++++++++++- .../source.formatted.wdl | 50 +++++++++---------- .../clays_complex_script/source.formatted.wdl | 8 +-- 3 files changed, 73 insertions(+), 30 deletions(-) diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index 32a5f5057..e2a69beb6 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -65,9 +65,52 @@ pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream

{ + SyntaxKind::DoubleQuote => { + (&child).write(stream); + } + SyntaxKind::LiteralStringText => { + let mut replacement = String::new(); + let syntax = child.element().syntax(); + let mut chars = syntax.as_token().expect("token").text().chars().peekable(); + let mut prev_c = None; + while let Some(c) = chars.next() { + match c { + '\\' => { + if let Some(next_c) = chars.peek() { + if *next_c == '\'' { + // Do not write this backslash + prev_c = Some(c); + continue; + } + } + replacement.push(c); + } + '"' => { + if let Some(pc) = prev_c { + if pc != '\\' { + replacement.push('\\'); + } + } + replacement.push(c); + } + _ => { + replacement.push(c); + } + } + prev_c = Some(c); + } + + stream.push_literal_in_place_of_token( + child.element().as_token().expect("token"), + replacement, + ); + } + SyntaxKind::PlaceholderNode => { (&child).write(stream); } + _ => { + unreachable!("unexpected child in literal string: {:?}", child.element().kind()); + } } } } diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl index c781b9178..adb1a3064 100644 --- a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -39,12 +39,12 @@ workflow chip { input_genomic_data: { title: "Input genomic data", description: "Genomic input files for experiment.", - help: "Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].", + help: "Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: [\"rep1.bam\", \"rep1.bam\"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].", }, input_genomic_data_control: { title: "Input genomic data (control)", description: "Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.", - help: "Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.", + help: "Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: [\"ctl1.bam\", \"ctl1.bam\"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.", }, pipeline_parameter: { title: "Pipeline parameter", @@ -124,7 +124,7 @@ workflow chip { group: "reference_genome", } custom_aligner_idx_tar: { - description: "Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.", + description: "Index TAR file for a custom aligner. To use a custom aligner, define \"chip.custom_align_py\" too.", group: "reference_genome", } chrsz: { @@ -144,7 +144,7 @@ workflow chip { mito_chr_name: { description: "Mitochondrial chromosome name.", group: "reference_genome", - help: "e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.", + help: "e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in \"filter\" task.", } regex_bfilt_peak_chr_name: { description: "Reg-ex for chromosomes to keep while filtering peaks.", @@ -152,7 +152,7 @@ workflow chip { help: "Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.", } gensz: { - description: "Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.", + description: "Genome sizes. \"hs\" for human, \"mm\" for mouse or sum of 2nd columnin chromosome sizes file.", group: "reference_genome", } paired_end: { @@ -316,12 +316,12 @@ workflow chip { peak_ppr1: { description: "NARROWPEAK file for pooled pseudo replicate 1.", group: "input_genomic_data", - help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate's 1st pseudos.", } peak_ppr2: { description: "NARROWPEAK file for pooled pseudo replicate 2.", group: "input_genomic_data", - help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate's 2nd pseudos.", } ctl_paired_end: { @@ -465,7 +465,7 @@ workflow chip { pipeline_type: { description: "Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.", group: "pipeline_parameter", - help: "Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.", + help: "Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn't. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.", choices: [ "tf", "histone", @@ -515,7 +515,7 @@ workflow chip { custom_align_py: { description: "Python script for a custom aligner.", group: "alignment", - help: "There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".", + help: "There is a template included in the documentation for inputs. Defining this parameter will automatically change \"chip.aligner\" to \"custom\". You should also define \"chip.custom_aligner_idx_tar\".", } use_bwa_mem_for_pe: { description: "For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.", @@ -528,12 +528,12 @@ workflow chip { help: "If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.", } use_bowtie2_local_mode: { - description: "Use bowtie2\'s local mode (soft-clipping).", + description: "Use bowtie2's local mode (soft-clipping).", group: "alignment", help: "This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.", } crop_length: { - description: "Crop FASTQs\' reads longer than this length.", + description: "Crop FASTQs' reads longer than this length.", group: "alignment", help: "Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.", } @@ -550,7 +550,7 @@ workflow chip { "phred33", "phred64", ], - help: "This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".", + help: "This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like \"Error: Unable to detect quality encoding\".", } xcor_trim_bp: { description: "Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.", @@ -615,17 +615,17 @@ workflow chip { pseudoreplication_random_seed: { description: "Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).", group: "alignment", - help: "Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.", + help: "Pseudo-replication (task spr) is done by using GNU \"shuf --random-source=sha256(random_seed)\". If this parameter == 0, then pipeline uses input TAG-ALIGN file's size (in bytes) for the random_seed.", } ctl_depth_limit: { - description: "Hard limit for chosen control\'s depth.", + description: "Hard limit for chosen control's depth.", group: "peak_calling", help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.", } exp_ctl_depth_ratio_limit: { - description: "Second limit for chosen control\'s depth.", + description: "Second limit for chosen control's depth.", group: "peak_calling", - help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.", + help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate's read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.", } fraglen: { description: "Fragment length for each biological replicate.", @@ -847,17 +847,17 @@ workflow chip { align_trimmomatic_java_heap: { description: "Maximum Java heap (java -Xmx) in task align.", group: "resource_parameter", - help: "Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.", + help: "Maximum memory for Trimmomatic. If not defined, 90% of align task's memory will be used.", } filter_picard_java_heap: { description: "Maximum Java heap (java -Xmx) in task filter.", group: "resource_parameter", - help: "Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.", + help: "Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task's memory will be used.", } gc_bias_picard_java_heap: { description: "Maximum Java heap (java -Xmx) in task gc_bias.", group: "resource_parameter", - help: "Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.", + help: "Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task's memory will be used.", } } @@ -1380,13 +1380,13 @@ workflow chip { # sanity check for inputs if (num_rep == 0 && num_ctl == 0) { call raise_exception as error_input_data { input: - msg = "No FASTQ/BAM/TAG-ALIGN/PEAK defined in your input JSON. Check if your FASTQs are defined as "chip.fastqs_repX_RY". DO NOT MISS suffix _R1 even for single ended FASTQ.", + msg = "No FASTQ/BAM/TAG-ALIGN/PEAK defined in your input JSON. Check if your FASTQs are defined as \"chip.fastqs_repX_RY\". DO NOT MISS suffix _R1 even for single ended FASTQ.", runtime_environment = runtime_environment, } } if (!align_only_ && peak_caller_ == "spp" && num_ctl == 0) { call raise_exception as error_control_required { input: - msg = "SPP requires control inputs. Define control input files ("chip.ctl_*") in an input JSON file.", + msg = "SPP requires control inputs. Define control input files (\"chip.ctl_*\") in an input JSON file.", runtime_environment = runtime_environment, } } @@ -1417,7 +1417,7 @@ workflow chip { if ((ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0) && num_ctl > 1 && length(ctl_paired_ends) > 1) { call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: - msg = "Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for " + "multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). " + "Automatic control subsampling is enabled by default. " + "Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. " + "You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done " + "for individual control\'s TAG-ALIGN output according to each control\'s endedness. ", + msg = "Cannot use automatic control subsampling (\"chip.ctl_depth_limit\">0 and \"chip.exp_ctl_depth_limit\">0) for " + "multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). " + "Automatic control subsampling is enabled by default. " + "Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. " + "You can still use manual control subsamping (\"chip.ctl_subsample_reads\">0) since it is done " + "for individual control's TAG-ALIGN output according to each control's endedness. ", runtime_environment = runtime_environment, } } @@ -3040,7 +3040,7 @@ task call_peak { python3 $(which encode_task_post_call_peak_chip.py) \ $(ls *Peak.gz) \ ~{"--ta " + tas[0]} \ - ~{"--regex-bfilt-peak-chr-name \'" + regex_bfilt_peak_chr_name + "\'"} \ + ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ ~{"--chrsz " + chrsz} \ ~{"--fraglen " + fraglen} \ ~{"--peak-type " + peak_type} \ @@ -3159,7 +3159,7 @@ task idr { ~{"--fraglen " + fraglen} \ ~{"--chrsz " + chrsz} \ ~{"--blacklist " + blacklist} \ - ~{"--regex-bfilt-peak-chr-name \'" + regex_bfilt_peak_chr_name + "\'"} \ + ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ ~{"--ta " + ta} >>> @@ -3219,7 +3219,7 @@ task overlap { ~{"--chrsz " + chrsz} \ ~{"--blacklist " + blacklist} \ --nonamecheck \ - ~{"--regex-bfilt-peak-chr-name \'" + regex_bfilt_peak_chr_name + "\'"} \ + ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ ~{"--ta " + ta} >>> diff --git a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl index 491e12a03..565699428 100644 --- a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl +++ b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl @@ -16,7 +16,7 @@ task a_task { # Here is a comment between `meta` and the parenthesis. { # Here is a comment within `meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" a_true: true a_false: false an_integer: 42 @@ -39,7 +39,7 @@ task a_task { # Here is a comment between `parameter_meta` and the parenthesis. { # Here is a comment within `parameter_meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" a_true: true a_false: false an_integer: 42 @@ -107,7 +107,7 @@ workflow hello { # Here is a comment between `meta` and the parenthesis. { # Here is a comment within `meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" a_true: true a_false: false an_integer: 42 @@ -130,7 +130,7 @@ workflow hello { # Here is a comment between `parameter_meta` and the parenthesis. { # Here is a comment within `parameter_meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" a_true: true a_false: false an_integer: 42 From 94aa6682855e644869a357eae4491940929b2f11 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Thu, 10 Oct 2024 16:22:44 -0400 Subject: [PATCH 18/60] feat: all elements in the tree are formatted --- wdl-ast/src/element.rs | 11 +- wdl-format/src/lib.rs | 47 +++-- wdl-format/src/v1.rs | 192 +++++++++++++++++- wdl-format/src/v1/expr.rs | 166 ++++++++++++++- wdl-format/src/v1/task.rs | 41 +++- wdl-format/src/v1/workflow.rs | 144 ++++++++++++- .../source.formatted.wdl | 40 ---- .../clays_complex_script/source.formatted.wdl | 4 +- .../source.formatted.wdl | 5 +- .../source.formatted.wdl | 83 ++++++++ .../not_covered_by_other_tests/source.wdl | 55 +++++ 11 files changed, 702 insertions(+), 86 deletions(-) create mode 100644 wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl create mode 100644 wdl-format/tests/format/not_covered_by_other_tests/source.wdl diff --git a/wdl-ast/src/element.rs b/wdl-ast/src/element.rs index 8dfb99a59..e578d4695 100644 --- a/wdl-ast/src/element.rs +++ b/wdl-ast/src/element.rs @@ -293,8 +293,14 @@ pub enum Node { VersionStatement(VersionStatement), /// A workflow definition. WorkflowDefinition(WorkflowDefinition), - /// A task item within a hints section. + /// An array within a workflow hints section. + WorkflowHintsArray(WorkflowHintsArray), + /// A hints item within a workflow hints section. WorkflowHintsItem(WorkflowHintsItem), + /// An object within a workflow hints section. + WorkflowHintsObject(WorkflowHintsObject), + /// An item within an object within a workflow hints section. + WorkflowHintsObjectItem(WorkflowHintsObjectItem), /// A hints section within a workflow. WorkflowHintsSection(WorkflowHintsSection), } @@ -386,7 +392,10 @@ ast_element_impl!( unbound_decl(): UnboundDeclNode => UnboundDecl => UnboundDecl, version_statement(): VersionStatementNode => VersionStatement => VersionStatement, workflow_definition(): WorkflowDefinitionNode => WorkflowDefinition => WorkflowDefinition, + workflow_hints_array(): WorkflowHintsArrayNode => WorkflowHintsArray => WorkflowHintsArray, workflow_hints_item(): WorkflowHintsItemNode => WorkflowHintsItem => WorkflowHintsItem, + workflow_hints_object(): WorkflowHintsObjectNode => WorkflowHintsObject => WorkflowHintsObject, + workflow_hints_object_item(): WorkflowHintsObjectItemNode => WorkflowHintsObjectItem => WorkflowHintsObjectItem, workflow_hints_section(): WorkflowHintsSectionNode => WorkflowHintsSection => WorkflowHintsSection ] ); diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index 3b70a4c08..fa71b7431 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -68,10 +68,12 @@ impl Writable for &FormatElement { AstNode::ConditionalStatement(_) => { v1::workflow::format_conditional_statement(self, stream) } - AstNode::DefaultOption(_) => todo!(), - AstNode::DivisionExpr(_) => todo!(), + AstNode::DefaultOption(_) => v1::expr::format_default_option(self, stream), + AstNode::DivisionExpr(_) => v1::expr::format_division_expr(self, stream), AstNode::EqualityExpr(_) => v1::expr::format_equality_expr(self, stream), - AstNode::ExponentiationExpr(_) => todo!(), + AstNode::ExponentiationExpr(_) => { + v1::expr::format_exponentiation_expr(self, stream) + } AstNode::GreaterEqualExpr(_) => v1::expr::format_greater_equal_expr(self, stream), AstNode::GreaterExpr(_) => v1::expr::format_greater_expr(self, stream), AstNode::IfExpr(_) => v1::expr::format_if_expr(self, stream), @@ -85,20 +87,20 @@ impl Writable for &FormatElement { AstNode::LiteralArray(_) => v1::expr::format_literal_array(self, stream), AstNode::LiteralBoolean(_) => v1::expr::format_literal_boolean(self, stream), AstNode::LiteralFloat(_) => v1::expr::format_literal_float(self, stream), - AstNode::LiteralHints(_) => todo!(), - AstNode::LiteralHintsItem(_) => todo!(), - AstNode::LiteralInput(_) => todo!(), - AstNode::LiteralInputItem(_) => todo!(), + AstNode::LiteralHints(_) => v1::format_literal_hints(self, stream), + AstNode::LiteralHintsItem(_) => v1::format_literal_hints_item(self, stream), + AstNode::LiteralInput(_) => v1::format_literal_input(self, stream), + AstNode::LiteralInputItem(_) => v1::format_literal_input_item(self, stream), AstNode::LiteralInteger(_) => v1::expr::format_literal_integer(self, stream), AstNode::LiteralMap(_) => v1::expr::format_literal_map(self, stream), AstNode::LiteralMapItem(_) => v1::expr::format_literal_map_item(self, stream), - AstNode::LiteralNone(_) => todo!(), + AstNode::LiteralNone(_) => v1::expr::format_literal_none(self, stream), AstNode::LiteralNull(_) => v1::format_literal_null(self, stream), AstNode::LiteralObject(_) => v1::expr::format_literal_object(self, stream), AstNode::LiteralObjectItem(_) => v1::expr::format_literal_object_item(self, stream), - AstNode::LiteralOutput(_) => todo!(), - AstNode::LiteralOutputItem(_) => todo!(), - AstNode::LiteralPair(_) => todo!(), + AstNode::LiteralOutput(_) => v1::format_literal_output(self, stream), + AstNode::LiteralOutputItem(_) => v1::format_literal_output_item(self, stream), + AstNode::LiteralPair(_) => v1::expr::format_literal_pair(self, stream), AstNode::LiteralString(_) => v1::expr::format_literal_string(self, stream), AstNode::LiteralStruct(_) => v1::r#struct::format_literal_struct(self, stream), AstNode::LiteralStructItem(_) => { @@ -112,7 +114,7 @@ impl Writable for &FormatElement { AstNode::MetadataObject(_) => v1::format_metadata_object(self, stream), AstNode::MetadataObjectItem(_) => v1::format_metadata_object_item(self, stream), AstNode::MetadataSection(_) => v1::format_metadata_section(self, stream), - AstNode::ModuloExpr(_) => todo!(), + AstNode::ModuloExpr(_) => v1::expr::format_modulo_expr(self, stream), AstNode::MultiplicationExpr(_) => { v1::expr::format_multiplication_expr(self, stream) } @@ -140,19 +142,32 @@ impl Writable for &FormatElement { AstNode::StructDefinition(_) => { v1::r#struct::format_struct_definition(self, stream) } - AstNode::SubtractionExpr(_) => todo!(), + AstNode::SubtractionExpr(_) => v1::expr::format_subtraction_expr(self, stream), AstNode::TaskDefinition(_) => v1::task::format_task_definition(self, stream), AstNode::TaskHintsItem(_) => v1::task::format_task_hints_item(self, stream), AstNode::TaskHintsSection(_) => v1::task::format_task_hints_section(self, stream), - AstNode::TrueFalseOption(_) => todo!(), + AstNode::TrueFalseOption(_) => v1::expr::format_true_false_option(self, stream), AstNode::TypeRef(_) => v1::format_type_ref(self, stream), AstNode::UnboundDecl(_) => v1::format_unbound_decl(self, stream), AstNode::VersionStatement(_) => v1::format_version_statement(self, stream), AstNode::WorkflowDefinition(_) => { v1::workflow::format_workflow_definition(self, stream) } - AstNode::WorkflowHintsItem(_) => todo!(), - AstNode::WorkflowHintsSection(_) => todo!(), + AstNode::WorkflowHintsArray(_) => { + v1::workflow::format_workflow_hints_array(self, stream) + } + AstNode::WorkflowHintsItem(_) => { + v1::workflow::format_workflow_hints_item(self, stream) + } + AstNode::WorkflowHintsObject(_) => { + v1::workflow::format_workflow_hints_object(self, stream) + } + AstNode::WorkflowHintsObjectItem(_) => { + v1::workflow::format_workflow_hints_object_item(self, stream) + } + AstNode::WorkflowHintsSection(_) => { + v1::workflow::format_workflow_hints_section(self, stream) + } }, Element::Token(token) => { stream.push_ast_token(token); diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index 89ed3cf0c..0d5fb8649 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -99,20 +99,28 @@ pub fn format_array_type(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("map type children") { (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } } } /// Formats an [`ObjectType`](wdl_ast::v1::ObjectType). pub fn format_object_type(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("object type children") { - (&child).write(stream); - } + let mut children = element.children().expect("object type children"); + let object_keyword = children.next().expect("object type object keyword"); + assert!(object_keyword.element().kind() == SyntaxKind::ObjectTypeKeyword); + (&object_keyword).write(stream); + assert!(children.next().is_none()); } /// Formats a [`PairType`](wdl_ast::v1::PairType). pub fn format_pair_type(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("pair type children") { (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } } } @@ -212,11 +220,10 @@ pub fn format_metadata_array(element: &FormatElement, stream: &mut TokenStream

) { + let mut children = element.children().expect("literal input item children"); + + let key = children.next().expect("literal input item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal input item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let hints_node = children.next().expect("literal input item hints node"); + assert!(hints_node.element().kind() == SyntaxKind::LiteralHintsNode); + (&hints_node).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralInput`](wdl_ast::v1::LiteralInput). +pub fn format_literal_input(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal input children"); + + let input_keyword = children.next().expect("literal input keyword"); + assert!(input_keyword.element().kind() == SyntaxKind::InputKeyword); + (&input_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal input open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } else { + assert!(child.element().kind() == SyntaxKind::LiteralInputItemNode); + } + (&child).write(stream); } stream.end_line(); } + +/// Formats a [`LiteralHintsItem`](wdl_ast::v1::LiteralHintsItem). +pub fn format_literal_hints_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal hints item children"); + + let key = children.next().expect("literal hints item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal hints item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("literal hints item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralHints`](wdl_ast::v1::LiteralHints). +pub fn format_literal_hints(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal hints children"); + + let hints_keyword = children.next().expect("literal hints keyword"); + assert!(hints_keyword.element().kind() == SyntaxKind::HintsKeyword); + (&hints_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal hints open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::LiteralHintsItemNode => items.push(child), + SyntaxKind::Comma => commas.push(child), + SyntaxKind::CloseBrace => close_brace = Some(child), + _ => panic!("unexpected literal hints child"), + } + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal hints close brace")).write(stream); +} + +/// Formats a [`LiteralOutputItem`](wdl_ast::v1::LiteralOutputItem). +pub fn format_literal_output_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element + .children() + .expect("literal output item children") + .peekable(); + + for child in children.by_ref() { + if matches!(child.element().kind(), SyntaxKind::Ident | SyntaxKind::Dot) { + (&child).write(stream); + } else { + assert!(child.element().kind() == SyntaxKind::Colon); + (&child).write(stream); + stream.end_word(); + break; + } + } + + let value = children.next().expect("literal output item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralOutput`](wdl_ast::v1::LiteralOutput). +pub fn format_literal_output(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal output children"); + + let output_keyword = children.next().expect("literal output keyword"); + assert!(output_keyword.element().kind() == SyntaxKind::OutputKeyword); + (&output_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal output open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::LiteralOutputItemNode => items.push(child), + SyntaxKind::Comma => commas.push(child), + SyntaxKind::CloseBrace => close_brace = Some(child), + _ => panic!("unexpected literal output child"), + } + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal output close brace")).write(stream); +} diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index e2a69beb6..3834e6764 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -27,6 +27,77 @@ pub fn format_sep_option(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("default option children"); + + let default_keyword = children.next().expect("default keyword"); + assert!(default_keyword.element().kind() == SyntaxKind::Ident); + (&default_keyword).write(stream); + + let equals = children.next().expect("default equals"); + assert!(equals.element().kind() == SyntaxKind::Assignment); + (&equals).write(stream); + + let default_value = children.next().expect("default value"); + (&default_value).write(stream); + stream.end_word(); + + assert!(children.next().is_none()); +} + +/// Formats a [`TrueFalseOption`](wdl_ast::v1::TrueFalseOption). +pub fn format_true_false_option(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("true false option children"); + + let first_keyword = children.next().expect("true false option first keyword"); + let first_keyword_kind = first_keyword.element().kind(); + assert!( + first_keyword_kind == SyntaxKind::TrueKeyword + || first_keyword_kind == SyntaxKind::FalseKeyword + ); + + let first_equals = children.next().expect("true false option first equals"); + assert!(first_equals.element().kind() == SyntaxKind::Assignment); + + let first_value = children.next().expect("true false option first value"); + + let second_keyword = children.next().expect("true false option second keyword"); + let second_keyword_kind = second_keyword.element().kind(); + assert!( + second_keyword_kind == SyntaxKind::TrueKeyword + || second_keyword_kind == SyntaxKind::FalseKeyword + ); + + let second_equals = children.next().expect("true false option second equals"); + assert!(second_equals.element().kind() == SyntaxKind::Assignment); + + let second_value = children.next().expect("true false option second value"); + + if first_keyword_kind == SyntaxKind::TrueKeyword { + assert!(second_keyword_kind == SyntaxKind::FalseKeyword); + (&first_keyword).write(stream); + (&first_equals).write(stream); + (&first_value).write(stream); + stream.end_word(); + (&second_keyword).write(stream); + (&second_equals).write(stream); + (&second_value).write(stream); + } else { + assert!(second_keyword_kind == SyntaxKind::TrueKeyword); + (&second_keyword).write(stream); + (&second_equals).write(stream); + (&second_value).write(stream); + stream.end_word(); + (&first_keyword).write(stream); + (&first_equals).write(stream); + (&first_value).write(stream); + } + stream.end_word(); + + assert!(children.next().is_none()); +} + /// Formats a [`Placeholder`](wdl_ast::v1::Placeholder). pub fn format_placeholder(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("placeholder children"); @@ -109,12 +180,49 @@ pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream

{ - unreachable!("unexpected child in literal string: {:?}", child.element().kind()); + unreachable!( + "unexpected child in literal string: {:?}", + child.element().kind() + ); } } } } +/// Formats a [`LiteralNone`](wdl_ast::v1::LiteralNone). +pub fn format_literal_none(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal none children"); + let none = children.next().expect("literal none token"); + assert!(none.element().kind() == SyntaxKind::NoneKeyword); + (&none).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralPair`](wdl_ast::v1::LiteralPair). +pub fn format_literal_pair(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal pair children"); + + let open_paren = children.next().expect("literal pair open paren"); + assert!(open_paren.element().kind() == SyntaxKind::OpenParen); + (&open_paren).write(stream); + + let left = children.next().expect("literal pair left"); + (&left).write(stream); + + let comma = children.next().expect("literal pair comma"); + assert!(comma.element().kind() == SyntaxKind::Comma); + (&comma).write(stream); + stream.end_word(); + + let right = children.next().expect("literal pair right"); + (&right).write(stream); + + let close_paren = children.next().expect("literal pair close paren"); + assert!(close_paren.element().kind() == SyntaxKind::CloseParen); + (&close_paren).write(stream); + assert!(children.next().is_none()); +} + /// Formats a [`LiteralBoolean`](wdl_ast::v1::LiteralBoolean). pub fn format_literal_boolean(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("literal boolean children"); @@ -353,6 +461,20 @@ pub fn format_addition_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("subtraction expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Minus; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + /// Formats a [`MultiplicationExpr`](wdl_ast::v1::MultiplicationExpr). pub fn format_multiplication_expr(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("multiplication expr children") { @@ -367,6 +489,48 @@ pub fn format_multiplication_expr(element: &FormatElement, stream: &mut TokenStr } } +/// Formats a [`DivisionExpr`](wdl_ast::v1::DivisionExpr). +pub fn format_division_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("division expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Slash; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`ModuloExpr`](wdl_ast::v1::ModuloExpr). +pub fn format_modulo_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("modulo expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Percent; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats an [`ExponentiationExpr`](wdl_ast::v1::ExponentiationExpr). +pub fn format_exponentiation_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("exponentiation expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Exponentiation; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + /// Formats a [`LogicalAndExpr`](wdl_ast::v1::LogicalAndExpr). pub fn format_logical_and_expr(element: &FormatElement, stream: &mut TokenStream) { for child in element.children().expect("logical and expr children") { diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index 4dc4797df..db5fb409d 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -3,8 +3,8 @@ use wdl_ast::SyntaxKind; use crate::PreToken; -use crate::Trivia; use crate::TokenStream; +use crate::Trivia; use crate::Writable as _; use crate::element::FormatElement; @@ -145,27 +145,44 @@ pub fn format_command_section(element: &FormatElement, stream: &mut TokenStream< let open_delimiter = children.next().expect("open delimiter"); match open_delimiter.element().kind() { SyntaxKind::OpenBrace => { - stream.push_literal_in_place_of_token(open_delimiter.element().as_token().expect("open brace should be token"), "<<<".to_string()); - }, + stream.push_literal_in_place_of_token( + open_delimiter + .element() + .as_token() + .expect("open brace should be token"), + "<<<".to_string(), + ); + } SyntaxKind::OpenHeredoc => { (&open_delimiter).write(stream); - }, + } _ => { - unreachable!("unexpected open delimiter in command section: {:?}", open_delimiter.element().kind()); + unreachable!( + "unexpected open delimiter in command section: {:?}", + open_delimiter.element().kind() + ); } } - stream.increment_indent(); - + // Technically there's no trivia inside the command section, + // so we don't want to increment indent here. + // All the indentation should be handled by the command text itself. for child in children { let kind = child.element().kind(); if kind == SyntaxKind::CloseBrace { - stream.decrement_indent(); - stream.push_literal_in_place_of_token(child.element().as_token().expect("close brace should be token"), ">>>".to_string()); + stream.push_literal_in_place_of_token( + child + .element() + .as_token() + .expect("close brace should be token"), + ">>>".to_string(), + ); } else if kind == SyntaxKind::CloseHeredoc { - stream.decrement_indent(); (&child).write(stream); } else { - assert!(matches!(kind, SyntaxKind::LiteralCommandText | SyntaxKind::PlaceholderNode)); + assert!(matches!( + kind, + SyntaxKind::LiteralCommandText | SyntaxKind::PlaceholderNode + )); (&child).write(stream); } } @@ -227,6 +244,7 @@ pub fn format_requirements_section(element: &FormatElement, stream: &mut TokenSt for item in items { (&item).write(stream); + stream.end_line(); } stream.decrement_indent(); @@ -352,6 +370,7 @@ pub fn format_task_hints_section(element: &FormatElement, stream: &mut TokenStre for item in items { (&item).write(stream); + stream.end_line(); } stream.decrement_indent(); diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index 4b4e50148..87034a509 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -115,6 +115,7 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr let mut input = None; let mut body = Vec::new(); let mut output = None; + let mut hints = None; let mut close_brace = None; for child in children { @@ -143,6 +144,9 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr SyntaxKind::OutputSectionNode => { output = Some(child.clone()); } + SyntaxKind::WorkflowHintsSectionNode => { + hints = Some(child.clone()); + } SyntaxKind::CloseBrace => { close_brace = Some(child.clone()); } @@ -170,22 +174,154 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr stream.blank_line(); } - let need_blank = !body.is_empty(); for child in body { (&child).write(stream); } - if need_blank { - stream.blank_line(); - } + stream.blank_line(); if let Some(output) = output { (&output).write(stream); stream.blank_line(); } + if let Some(hints) = hints { + (&hints).write(stream); + stream.blank_line(); + } + stream.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); stream.decrement_indent(); (&close_brace.expect("workflow close brace")).write(stream); stream.end_line(); } + +/// Formats a [`WorkflowHintsArray`](wdl_ast::v1::WorkflowHintsArray). +pub fn format_workflow_hints_array(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints array children"); + + let open_bracket = children.next().expect("open bracket"); + assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket); + (&open_bracket).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_bracket = None; + + for child in children { + match child.element().kind() { + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBracket => { + close_bracket = Some(child.clone()); + } + _ => { + items.push(child.clone()); + } + } + } + + let mut commas = commas.into_iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (&comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_bracket.expect("workflow hints array close bracket")).write(stream); +} + +/// Formats a [`WorkflowHintsItem`](wdl_ast::v1::WorkflowHintsItem). +pub fn format_workflow_hints_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints item children"); + + let key = children.next().expect("workflow hints item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("workflow hints item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("workflow hints item value"); + (&value).write(stream); + + stream.end_line(); + + assert!(children.next().is_none()); +} + +/// Formats a [`WorkflowHintsObjectItem`](wdl_ast::v1::WorkflowHintsObjectItem). +pub fn format_workflow_hints_object_item( + element: &FormatElement, + stream: &mut TokenStream, +) { + let mut children = element + .children() + .expect("workflow hints object item children"); + + let key = children.next().expect("workflow hints object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("workflow hints object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("workflow hints object item value"); + (&value).write(stream); + + stream.end_line(); + + assert!(children.next().is_none()); +} + +/// Formats a [`WorkflowHintsObject`](wdl_ast::v1::WorkflowHintsObject). +pub fn format_workflow_hints_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints object children"); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } + (&child).write(stream); + stream.end_line(); + } +} + +/// Formats a [`WorkflowHintsSection`](wdl_ast::v1::WorkflowHintsSection). +pub fn format_workflow_hints_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints section children"); + + let hints_keyword = children.next().expect("hints keyword"); + assert!(hints_keyword.element().kind() == SyntaxKind::HintsKeyword); + (&hints_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } + (&child).write(stream); + stream.end_line(); + } +} diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl index adb1a3064..587c6e9c0 100644 --- a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -2438,7 +2438,6 @@ task align { ]) command <<< - set -e # check if pipeline dependencies can be found @@ -2522,7 +2521,6 @@ task align { ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} rm -rf R1 R2 R1$SUFFIX R2$SUFFIX - >>> output { @@ -2575,7 +2573,6 @@ task filter { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e python3 $(which encode_task_filter.py) \ ~{bam} \ @@ -2597,7 +2594,6 @@ task filter { ~{"--ref-fa " + ref_fa} \ '--delete-original-bam' fi - >>> output { @@ -2641,7 +2637,6 @@ task bam2ta { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e python3 $(which encode_task_bam2ta.py) \ ~{bam} \ @@ -2651,7 +2646,6 @@ task bam2ta { ~{"--subsample " + subsample} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} - >>> output { @@ -2687,13 +2681,11 @@ task spr { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e python3 $(which encode_task_spr.py) \ ~{ta} \ ~{"--pseudoreplication-random-seed " + pseudoreplication_random_seed} \ ~{if paired_end then "--paired-end" else ""} - >>> output { @@ -2723,13 +2715,11 @@ task pool_ta { } command <<< - set -e python3 $(which encode_task_pool_ta.py) \ ~{sep=" " select_all(tas)} \ ~{"--prefix " + prefix} \ ~{"--col " + col} - >>> output { @@ -2773,7 +2763,6 @@ task xcor { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e python3 $(which encode_task_xcor.py) \ ~{ta} \ @@ -2785,7 +2774,6 @@ task xcor { ~{"--exclusion-range-max " + exclusion_range_max} \ ~{"--subsample " + subsample} \ ~{"--nth " + cpu} - >>> output { @@ -2828,7 +2816,6 @@ task jsd { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e python3 $(which encode_task_jsd.py) \ ~{sep=" " select_all(nodup_bams)} \ @@ -2836,7 +2823,6 @@ task jsd { ~{"--mapq-thresh " + mapq_thresh} \ ~{"--blacklist " + blacklist} \ ~{"--nth " + cpu} - >>> output { @@ -2872,7 +2858,6 @@ task choose_ctl { } command <<< - set -e python3 $(which encode_task_choose_ctl.py) \ --tas ~{sep=" " select_all(tas)} \ @@ -2883,7 +2868,6 @@ task choose_ctl { ~{"--ctl-depth-ratio " + ctl_depth_ratio} \ ~{"--ctl-depth-limit " + ctl_depth_limit} \ ~{"--exp-ctl-depth-ratio-limit " + exp_ctl_depth_ratio_limit} - >>> output { @@ -2918,13 +2902,11 @@ task count_signal_track { Float mem_gb = 8.0 command <<< - set -e python3 $(which encode_task_count_signal_track.py) \ ~{ta} \ ~{"--chrsz " + chrsz} \ ~{"--mem-gb " + mem_gb} - >>> output { @@ -2961,12 +2943,10 @@ task subsample_ctl { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - python3 $(which encode_task_subsample_ctl.py) \ ~{ta} \ ~{"--subsample " + subsample} \ ~{if paired_end then "--paired-end" else ""} \ - >>> output { @@ -3014,7 +2994,6 @@ task call_peak { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e if [ '~{peak_caller}' == 'macs2' ]; then @@ -3045,7 +3024,6 @@ task call_peak { ~{"--fraglen " + fraglen} \ ~{"--peak-type " + peak_type} \ ~{"--blacklist " + blacklist} - >>> output { @@ -3096,7 +3074,6 @@ task macs2_signal_track { Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) command <<< - set -e python3 $(which encode_task_macs2_signal_track_chip.py) \ ~{sep=" " select_all(tas)} \ @@ -3105,7 +3082,6 @@ task macs2_signal_track { ~{"--fraglen " + fraglen} \ ~{"--pval-thresh " + pval_thresh} \ ~{"--mem-gb " + mem_gb} - >>> output { @@ -3146,7 +3122,6 @@ task idr { } command <<< - set -e ~{if defined(ta) then "" else "touch null.frip.qc"} touch null @@ -3161,7 +3136,6 @@ task idr { ~{"--blacklist " + blacklist} \ ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ ~{"--ta " + ta} - >>> output { @@ -3207,7 +3181,6 @@ task overlap { } command <<< - set -e ~{if defined(ta) then "" else "touch null.frip.qc"} touch null @@ -3221,7 +3194,6 @@ task overlap { --nonamecheck \ ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ ~{"--ta " + ta} - >>> output { @@ -3262,7 +3234,6 @@ task reproducibility { } command <<< - set -e python3 $(which encode_task_reproducibility.py) \ ~{sep=" " peaks} \ @@ -3271,7 +3242,6 @@ task reproducibility { --prefix ~{prefix} \ ~{"--peak-type " + peak_type} \ ~{"--chrsz " + chrsz} - >>> output { @@ -3320,13 +3290,11 @@ task gc_bias { Float picard_java_heap_factor = 0.9 command <<< - set -e python3 $(which encode_task_gc_bias.py) \ ~{"--nodup-bam " + nodup_bam} \ ~{"--ref-fa " + ref_fa} \ ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G")} - >>> output { @@ -3417,7 +3385,6 @@ task qc_report { } command <<< - set -e python3 $(which encode_task_qc_report.py) \ --pipeline-prefix chip \ @@ -3479,7 +3446,6 @@ task qc_report { --out-qc-html qc.html \ --out-qc-json qc.json \ ~{"--qc-json-ref " + qc_json_ref} - >>> output { @@ -3510,7 +3476,6 @@ task read_genome_tsv { } command <<< - echo "$(basename ~{genome_tsv})" > genome_name # create empty files for all entries touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 @@ -3527,7 +3492,6 @@ task read_genome_tsv { with open(key,'w') as fp2: fp2.write(val) CODE - >>> output { @@ -3564,7 +3528,6 @@ task rounded_mean { } command <<< - python <>> output { @@ -3602,10 +3564,8 @@ task raise_exception { } command <<< - echo -e "\n* Error: ~{msg}\n" >&2 exit 2 - >>> output { diff --git a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl index 565699428..147620ed0 100644 --- a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl +++ b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl @@ -70,9 +70,7 @@ task a_task { # the TextMate language that it's a custom struct. } - command <<< - - >>> + command <<< >>> output # Here is a comment before the output. diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl index 705f360ba..8a498a640 100644 --- a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl +++ b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -92,7 +92,7 @@ workflow test_wf { String a = "friend" Int b = 1 + 2 String c = "Hello, ~{a}" - Map[String,Int] d = { + Map[String, Int] d = { "a": 0, "b": 1, "c": 2, @@ -113,8 +113,7 @@ task test_task { String bowchicka } - command <<< - >>> + command <<<>>> } struct SpliceJunctionMotifs { diff --git a/wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl b/wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl new file mode 100644 index 000000000..a181a9fda --- /dev/null +++ b/wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl @@ -0,0 +1,83 @@ +## This is a WDL file with Nodes not covered by other tests +version 1.2 + +task test1 { + meta { + } + + parameter_meta { + } + + input { + String? name = None + Float exponent = 2.7 ** 3 + } + + Pair[String, Float] literal = ("hello", 3.14 - 6.8) + + Boolean flag = true + Int modulo = 42 % 7 + + command # my command block + <<< + echo 'hello ~{default="world" name}' + echo '~{true="good" false="bad" flag}bye' + >>> + + output { + Int math = 42 / 7 + } + + hints { + inputs: input { + a: hints { + foo: "bar", + } + } + f: [ + 1, + 2, + 3, + ] + g: { + foo: "bar", + } + outputs: output { + foo: hints { + a: "a", + b: "b", + c: "c", + }, + baz.bar.qux: hints { + foo: "foo", + bar: "bar", + baz: "baz", + }, + } + } +} + +workflow test2 { + Pair[String, Float] literal = ("hello", 3.14 - 6.8) + + output { + Int math = 42 / 7 + } + + hints { + allow_nested_inputs: true + a: true + b: 1 + c: 1.0 + d: -1 + e: "foo" + f: [ + 1, + 2, + 3, + ] + g: { + foo: "bar" + } + } +} diff --git a/wdl-format/tests/format/not_covered_by_other_tests/source.wdl b/wdl-format/tests/format/not_covered_by_other_tests/source.wdl new file mode 100644 index 000000000..41aacd302 --- /dev/null +++ b/wdl-format/tests/format/not_covered_by_other_tests/source.wdl @@ -0,0 +1,55 @@ +## This is a WDL file with Nodes not covered by other tests +version 1.2 +task test1 { + parameter_meta {} + output {Int math = 42 / 7} + hints {inputs: input { + a: hints { + foo: "bar" + } + } + f: [1, 2, 3] + g: { foo: "bar" } + outputs: output { + foo: hints { + a: "a", + b: "b", + c: "c", + }, + baz.bar.qux: hints { + foo: "foo", + bar: "bar", + baz: "baz", + }, + }} + command # my command block + { + echo 'hello ${default='world' name}' + echo '~{false="bad" true='good' flag}bye' + } + Pair[String, Float] literal = ("hello",3.14-6.8) + + + + Boolean flag = true + Int modulo = 42 % 7 + input { + String? name = None + Float exponent = 2.7**3 + } + meta {} +} +workflow test2 { + output {Int math = 42 / 7} + hints { + allow_nested_inputs: true + a: true + b: 1 + c: 1.0 + d: -1 + e: "foo" + f: [1, 2, 3] + g: { foo: "bar" } + } + Pair[String, Float] literal = ("hello",3.14-6.8) +} \ No newline at end of file From fd4b14543166b0ebf169f94e6a97c67ecf04021d Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Thu, 10 Oct 2024 17:40:02 -0400 Subject: [PATCH 19/60] fix: don't line split empty arrays --- wdl-format/src/v1/expr.rs | 12 +- .../source.formatted.wdl | 198 ++++++------------ .../source.formatted.wdl | 3 +- 3 files changed, 75 insertions(+), 138 deletions(-) diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index 3834e6764..5ea2e7a5d 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -272,7 +272,6 @@ pub fn format_literal_array(element: &FormatElement, stream: &mut TokenStream>(); + let empty = items.is_empty(); + if !empty { + stream.increment_indent(); + } let mut commas = commas.iter(); for item in items { (&item).write(stream); if let Some(comma) = commas.next() { (comma).write(stream); - stream.end_line(); } else { stream.push_literal(",".to_string(), SyntaxKind::Comma); - stream.end_line(); } + stream.end_line(); } - stream.decrement_indent(); + if !empty { + stream.decrement_indent(); + } (&close_bracket.expect("literal array close bracket")).write(stream); } diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl index 587c6e9c0..08c6d6220 100644 --- a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -889,113 +889,62 @@ workflow chip { # group: input_genomic_data Boolean? paired_end - Array[Boolean] paired_ends = [ - ] - Array[File] fastqs_rep1_R1 = [ - ] - Array[File] fastqs_rep1_R2 = [ - ] - Array[File] fastqs_rep2_R1 = [ - ] - Array[File] fastqs_rep2_R2 = [ - ] - Array[File] fastqs_rep3_R1 = [ - ] - Array[File] fastqs_rep3_R2 = [ - ] - Array[File] fastqs_rep4_R1 = [ - ] - Array[File] fastqs_rep4_R2 = [ - ] - Array[File] fastqs_rep5_R1 = [ - ] - Array[File] fastqs_rep5_R2 = [ - ] - Array[File] fastqs_rep6_R1 = [ - ] - Array[File] fastqs_rep6_R2 = [ - ] - Array[File] fastqs_rep7_R1 = [ - ] - Array[File] fastqs_rep7_R2 = [ - ] - Array[File] fastqs_rep8_R1 = [ - ] - Array[File] fastqs_rep8_R2 = [ - ] - Array[File] fastqs_rep9_R1 = [ - ] - Array[File] fastqs_rep9_R2 = [ - ] - Array[File] fastqs_rep10_R1 = [ - ] - Array[File] fastqs_rep10_R2 = [ - ] - Array[File] bams = [ - ] - Array[File] nodup_bams = [ - ] - Array[File] tas = [ - ] - Array[File] peaks = [ - ] - Array[File] peaks_pr1 = [ - ] - Array[File] peaks_pr2 = [ - ] + Array[Boolean] paired_ends = [] + Array[File] fastqs_rep1_R1 = [] + Array[File] fastqs_rep1_R2 = [] + Array[File] fastqs_rep2_R1 = [] + Array[File] fastqs_rep2_R2 = [] + Array[File] fastqs_rep3_R1 = [] + Array[File] fastqs_rep3_R2 = [] + Array[File] fastqs_rep4_R1 = [] + Array[File] fastqs_rep4_R2 = [] + Array[File] fastqs_rep5_R1 = [] + Array[File] fastqs_rep5_R2 = [] + Array[File] fastqs_rep6_R1 = [] + Array[File] fastqs_rep6_R2 = [] + Array[File] fastqs_rep7_R1 = [] + Array[File] fastqs_rep7_R2 = [] + Array[File] fastqs_rep8_R1 = [] + Array[File] fastqs_rep8_R2 = [] + Array[File] fastqs_rep9_R1 = [] + Array[File] fastqs_rep9_R2 = [] + Array[File] fastqs_rep10_R1 = [] + Array[File] fastqs_rep10_R2 = [] + Array[File] bams = [] + Array[File] nodup_bams = [] + Array[File] tas = [] + Array[File] peaks = [] + Array[File] peaks_pr1 = [] + Array[File] peaks_pr2 = [] File? peak_ppr1 File? peak_ppr2 File? peak_pooled Boolean? ctl_paired_end - Array[Boolean] ctl_paired_ends = [ - ] - Array[File] ctl_fastqs_rep1_R1 = [ - ] - Array[File] ctl_fastqs_rep1_R2 = [ - ] - Array[File] ctl_fastqs_rep2_R1 = [ - ] - Array[File] ctl_fastqs_rep2_R2 = [ - ] - Array[File] ctl_fastqs_rep3_R1 = [ - ] - Array[File] ctl_fastqs_rep3_R2 = [ - ] - Array[File] ctl_fastqs_rep4_R1 = [ - ] - Array[File] ctl_fastqs_rep4_R2 = [ - ] - Array[File] ctl_fastqs_rep5_R1 = [ - ] - Array[File] ctl_fastqs_rep5_R2 = [ - ] - Array[File] ctl_fastqs_rep6_R1 = [ - ] - Array[File] ctl_fastqs_rep6_R2 = [ - ] - Array[File] ctl_fastqs_rep7_R1 = [ - ] - Array[File] ctl_fastqs_rep7_R2 = [ - ] - Array[File] ctl_fastqs_rep8_R1 = [ - ] - Array[File] ctl_fastqs_rep8_R2 = [ - ] - Array[File] ctl_fastqs_rep9_R1 = [ - ] - Array[File] ctl_fastqs_rep9_R2 = [ - ] - Array[File] ctl_fastqs_rep10_R1 = [ - ] - Array[File] ctl_fastqs_rep10_R2 = [ - ] - Array[File] ctl_bams = [ - ] - Array[File] ctl_nodup_bams = [ - ] - Array[File] ctl_tas = [ - ] + Array[Boolean] ctl_paired_ends = [] + Array[File] ctl_fastqs_rep1_R1 = [] + Array[File] ctl_fastqs_rep1_R2 = [] + Array[File] ctl_fastqs_rep2_R1 = [] + Array[File] ctl_fastqs_rep2_R2 = [] + Array[File] ctl_fastqs_rep3_R1 = [] + Array[File] ctl_fastqs_rep3_R2 = [] + Array[File] ctl_fastqs_rep4_R1 = [] + Array[File] ctl_fastqs_rep4_R2 = [] + Array[File] ctl_fastqs_rep5_R1 = [] + Array[File] ctl_fastqs_rep5_R2 = [] + Array[File] ctl_fastqs_rep6_R1 = [] + Array[File] ctl_fastqs_rep6_R2 = [] + Array[File] ctl_fastqs_rep7_R1 = [] + Array[File] ctl_fastqs_rep7_R2 = [] + Array[File] ctl_fastqs_rep8_R1 = [] + Array[File] ctl_fastqs_rep8_R2 = [] + Array[File] ctl_fastqs_rep9_R1 = [] + Array[File] ctl_fastqs_rep9_R2 = [] + Array[File] ctl_fastqs_rep10_R1 = [] + Array[File] ctl_fastqs_rep10_R2 = [] + Array[File] ctl_bams = [] + Array[File] ctl_nodup_bams = [] + Array[File] ctl_tas = [] # group: pipeline_parameter String pipeline_type @@ -1020,8 +969,7 @@ workflow chip { String dup_marker = "picard" Boolean no_dup_removal = false Int mapq_thresh = 30 - Array[String] filter_chrs = [ - ] + Array[String] filter_chrs = [] Int subsample_reads = 0 Int ctl_subsample_reads = 0 Int xcor_subsample_reads = 15000000 @@ -1032,8 +980,7 @@ workflow chip { # group: peak_calling Int ctl_depth_limit = 200000000 Float exp_ctl_depth_ratio_limit = 5.0 - Array[Int?] fraglen = [ - ] + Array[Int?] fraglen = [] String? peak_caller Boolean always_use_pooled_ctl = true Float ctl_depth_ratio = 1.2 @@ -1263,8 +1210,7 @@ workflow chip { fastqs_rep2_R1, ] else if length(fastqs_rep1_R1) > 0 then [ fastqs_rep1_R1, - ] else [ - ] + ] else [] # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] fastqs_R2 = [ fastqs_rep1_R2, @@ -1345,8 +1291,7 @@ workflow chip { ctl_fastqs_rep2_R1, ] else if length(ctl_fastqs_rep1_R1) > 0 then [ ctl_fastqs_rep1_R1, - ] else [ - ] + ] else [] # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] ctl_fastqs_R2 = [ ctl_fastqs_rep1_R2, @@ -1447,8 +1392,7 @@ workflow chip { if (has_input_of_align && !has_output_of_align) { call align { input: fastqs_R1 = fastqs_R1[i], - fastqs_R2 = if paired_end_ then fastqs_R2[i] else [ - ], + fastqs_R2 = if paired_end_ then fastqs_R2[i] else [], crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1552,8 +1496,7 @@ workflow chip { if (has_input_of_align) { call align as align_R1 { input: fastqs_R1 = fastqs_R1[i], - fastqs_R2 = [ - ], + fastqs_R2 = [], trim_bp = xcor_trim_bp, crop_length = 0, crop_length_tol = 0, @@ -1687,8 +1630,7 @@ workflow chip { if (has_input_of_align_ctl && !has_output_of_align_ctl) { call align as align_ctl { input: fastqs_R1 = ctl_fastqs_R1[i], - fastqs_R2 = if ctl_paired_end_ then ctl_fastqs_R2[i] else [ - ], + fastqs_R2 = if ctl_paired_end_ then ctl_fastqs_R2[i] else [], crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1871,8 +1813,7 @@ workflow chip { runtime_environment = runtime_environment, } } - Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [ - ] else if chosen_ctl_ta_subsample > 0 then [ + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] else if chosen_ctl_ta_subsample > 0 then [ select_first([ subsample_ctl.ta_subsampled, ]), @@ -2030,8 +1971,7 @@ workflow chip { } } # actually not an array - Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [ - ] else if chosen_ctl_ta_pooled_subsample > 0 then [ + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled, ] else if num_ctl < 2 then [ ctl_ta_[0], @@ -2281,8 +2221,7 @@ workflow chip { peaks = select_all(overlap.bfilt_overlap_peak), peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([ overlap_pr.bfilt_overlap_peak, - ]) else [ - ], + ]) else [], peak_ppr = overlap_ppr.bfilt_overlap_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2297,8 +2236,7 @@ workflow chip { peaks = select_all(idr.bfilt_idr_peak), peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([ idr_pr.bfilt_idr_peak, - ]) else [ - ], + ]) else [], peak_ppr = idr_ppr.bfilt_idr_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2339,8 +2277,7 @@ workflow chip { jsd_plot = jsd.plot, jsd_qcs = if defined(jsd.jsd_qcs) then select_first([ jsd.jsd_qcs, - ]) else [ - ], + ]) else [], frip_qcs = select_all(call_peak.frip_qc), frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), @@ -2352,20 +2289,17 @@ workflow chip { idr_plots = select_all(idr.idr_plot), idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([ idr_pr.idr_plot, - ]) else [ - ], + ]) else [], idr_plot_ppr = idr_ppr.idr_plot, frip_idr_qcs = select_all(idr.frip_qc), frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([ idr_pr.frip_qc, - ]) else [ - ], + ]) else [], frip_idr_qc_ppr = idr_ppr.frip_qc, frip_overlap_qcs = select_all(overlap.frip_qc), frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([ overlap_pr.frip_qc, - ]) else [ - ], + ]) else [], frip_overlap_qc_ppr = overlap_ppr.frip_qc, idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl index 8a498a640..dc7e44561 100644 --- a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl +++ b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -75,8 +75,7 @@ workflow test_wf { } call aliased as x call aliased as x { input: } - call f after x after y call f after x after y { input: a = [ - ] } + call f after x after y call f after x after y { input: a = [] } call f as x after x call f as x after x after y { input: name = "hello" } call test_task as foo { input: bowchicka = "wowwow" } if (true) { From 1e66dd042bc36bd2a4825e703861fcbd37a24fef Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Thu, 10 Oct 2024 19:05:11 -0400 Subject: [PATCH 20/60] revise: reorganize format functions --- wdl-format/src/lib.rs | 30 ++--- wdl-format/src/v1.rs | 258 +------------------------------------- wdl-format/src/v1/decl.rs | 77 ++++++++++++ wdl-format/src/v1/meta.rs | 195 ++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 270 deletions(-) create mode 100644 wdl-format/src/v1/decl.rs create mode 100644 wdl-format/src/v1/meta.rs diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index fa71b7431..faeea5672 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -51,9 +51,9 @@ impl Writable for &FormatElement { Element::Node(node) => match node { AstNode::AccessExpr(_) => v1::expr::format_access_expr(self, stream), AstNode::AdditionExpr(_) => v1::expr::format_addition_expr(self, stream), - AstNode::ArrayType(_) => v1::format_array_type(self, stream), + AstNode::ArrayType(_) => v1::decl::format_array_type(self, stream), AstNode::Ast(_) => v1::format_ast(self, stream), - AstNode::BoundDecl(_) => v1::format_bound_decl(self, stream), + AstNode::BoundDecl(_) => v1::decl::format_bound_decl(self, stream), AstNode::CallAfter(_) => v1::workflow::call::format_call_after(self, stream), AstNode::CallAlias(_) => v1::workflow::call::format_call_alias(self, stream), AstNode::CallExpr(_) => v1::expr::format_call_expr(self, stream), @@ -95,7 +95,7 @@ impl Writable for &FormatElement { AstNode::LiteralMap(_) => v1::expr::format_literal_map(self, stream), AstNode::LiteralMapItem(_) => v1::expr::format_literal_map_item(self, stream), AstNode::LiteralNone(_) => v1::expr::format_literal_none(self, stream), - AstNode::LiteralNull(_) => v1::format_literal_null(self, stream), + AstNode::LiteralNull(_) => v1::meta::format_literal_null(self, stream), AstNode::LiteralObject(_) => v1::expr::format_literal_object(self, stream), AstNode::LiteralObjectItem(_) => v1::expr::format_literal_object_item(self, stream), AstNode::LiteralOutput(_) => v1::format_literal_output(self, stream), @@ -109,11 +109,13 @@ impl Writable for &FormatElement { AstNode::LogicalAndExpr(_) => v1::expr::format_logical_and_expr(self, stream), AstNode::LogicalNotExpr(_) => v1::expr::format_logical_not_expr(self, stream), AstNode::LogicalOrExpr(_) => v1::expr::format_logical_or_expr(self, stream), - AstNode::MapType(_) => v1::format_map_type(self, stream), - AstNode::MetadataArray(_) => v1::format_metadata_array(self, stream), - AstNode::MetadataObject(_) => v1::format_metadata_object(self, stream), - AstNode::MetadataObjectItem(_) => v1::format_metadata_object_item(self, stream), - AstNode::MetadataSection(_) => v1::format_metadata_section(self, stream), + AstNode::MapType(_) => v1::decl::format_map_type(self, stream), + AstNode::MetadataArray(_) => v1::meta::format_metadata_array(self, stream), + AstNode::MetadataObject(_) => v1::meta::format_metadata_object(self, stream), + AstNode::MetadataObjectItem(_) => { + v1::meta::format_metadata_object_item(self, stream) + } + AstNode::MetadataSection(_) => v1::meta::format_metadata_section(self, stream), AstNode::ModuloExpr(_) => v1::expr::format_modulo_expr(self, stream), AstNode::MultiplicationExpr(_) => { v1::expr::format_multiplication_expr(self, stream) @@ -121,14 +123,14 @@ impl Writable for &FormatElement { AstNode::NameRef(_) => v1::expr::format_name_ref(self, stream), AstNode::NegationExpr(_) => v1::expr::format_negation_expr(self, stream), AstNode::OutputSection(_) => v1::format_output_section(self, stream), - AstNode::PairType(_) => v1::format_pair_type(self, stream), - AstNode::ObjectType(_) => v1::format_object_type(self, stream), + AstNode::PairType(_) => v1::decl::format_pair_type(self, stream), + AstNode::ObjectType(_) => v1::decl::format_object_type(self, stream), AstNode::ParameterMetadataSection(_) => { - v1::format_parameter_metadata_section(self, stream) + v1::meta::format_parameter_metadata_section(self, stream) } AstNode::ParenthesizedExpr(_) => v1::expr::format_parenthesized_expr(self, stream), AstNode::Placeholder(_) => v1::expr::format_placeholder(self, stream), - AstNode::PrimitiveType(_) => v1::format_primitive_type(self, stream), + AstNode::PrimitiveType(_) => v1::decl::format_primitive_type(self, stream), AstNode::RequirementsItem(_) => v1::task::format_requirements_item(self, stream), AstNode::RequirementsSection(_) => { v1::task::format_requirements_section(self, stream) @@ -147,8 +149,8 @@ impl Writable for &FormatElement { AstNode::TaskHintsItem(_) => v1::task::format_task_hints_item(self, stream), AstNode::TaskHintsSection(_) => v1::task::format_task_hints_section(self, stream), AstNode::TrueFalseOption(_) => v1::expr::format_true_false_option(self, stream), - AstNode::TypeRef(_) => v1::format_type_ref(self, stream), - AstNode::UnboundDecl(_) => v1::format_unbound_decl(self, stream), + AstNode::TypeRef(_) => v1::decl::format_type_ref(self, stream), + AstNode::UnboundDecl(_) => v1::decl::format_unbound_decl(self, stream), AstNode::VersionStatement(_) => v1::format_version_statement(self, stream), AstNode::WorkflowDefinition(_) => { v1::workflow::format_workflow_definition(self, stream) diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index 0d5fb8649..859f46682 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -3,8 +3,10 @@ use wdl_ast::AstToken; use wdl_ast::SyntaxKind; +pub mod decl; pub mod expr; pub mod import; +pub mod meta; pub mod r#struct; pub mod task; pub mod workflow; @@ -73,83 +75,6 @@ pub fn format_version_statement(element: &FormatElement, stream: &mut TokenStrea stream.end_line(); } -/// Formats a [`LiteralNull`](wdl_ast::v1::LiteralNull). -pub fn format_literal_null(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("literal null children"); - let null = children.next().expect("literal null token"); - (&null).write(stream); - assert!(children.next().is_none()); -} - -/// Formats a [`PrimitiveType`](wdl_ast::v1::PrimitiveType). -pub fn format_primitive_type(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("primitive type children") { - (&child).write(stream); - } -} - -/// Formats an [`ArrayType`](wdl_ast::v1::ArrayType). -pub fn format_array_type(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("array type children") { - (&child).write(stream); - } -} - -/// Formats a [`MapType`](wdl_ast::v1::MapType). -pub fn format_map_type(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("map type children") { - (&child).write(stream); - if child.element().kind() == SyntaxKind::Comma { - stream.end_word(); - } - } -} - -/// Formats an [`ObjectType`](wdl_ast::v1::ObjectType). -pub fn format_object_type(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("object type children"); - let object_keyword = children.next().expect("object type object keyword"); - assert!(object_keyword.element().kind() == SyntaxKind::ObjectTypeKeyword); - (&object_keyword).write(stream); - assert!(children.next().is_none()); -} - -/// Formats a [`PairType`](wdl_ast::v1::PairType). -pub fn format_pair_type(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("pair type children") { - (&child).write(stream); - if child.element().kind() == SyntaxKind::Comma { - stream.end_word(); - } - } -} - -/// Formats a [`TypeRef`](wdl_ast::v1::TypeRef). -pub fn format_type_ref(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("type ref children"); - let t = children.next().expect("type ref type"); - (&t).write(stream); - assert!(children.next().is_none()); -} - -/// Formats an [`UnboundDecl`](wdl_ast::v1::UnboundDecl). -pub fn format_unbound_decl(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("unbound decl children") { - (&child).write(stream); - stream.end_word(); - } - stream.end_line(); -} - -/// Formats a [`BoundDecl`](wdl_ast::v1::BoundDecl). -pub fn format_bound_decl(element: &FormatElement, stream: &mut TokenStream) { - for child in element.children().expect("bound decl children") { - (&child).write(stream); - stream.end_word(); - } - stream.end_line(); -} - /// Formats an [`InputSection`](wdl_ast::v1::InputSection). pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("input section children"); @@ -190,185 +115,6 @@ pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("metadata array children"); - - let open_bracket = children.next().expect("metadata array open bracket"); - assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket); - (&open_bracket).write(stream); - stream.increment_indent(); - - let mut close_bracket = None; - let mut commas = Vec::new(); - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::CloseBracket { - close_bracket = Some(child.to_owned()); - false - } else if child.element().kind() == SyntaxKind::Comma { - commas.push(child.to_owned()); - false - } else { - true - } - }) - .collect::>(); - - let mut commas = commas.iter(); - for item in items { - (&item).write(stream); - if let Some(comma) = commas.next() { - (comma).write(stream); - } else { - stream.push_literal(",".to_string(), SyntaxKind::Comma); - } - stream.end_line(); - } - - stream.decrement_indent(); - (&close_bracket.expect("metadata array close bracket")).write(stream); -} - -/// Formats a [`MetadataObject`](wdl_ast::v1::MetadataObject). -pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("metadata object children"); - - let open_brace = children.next().expect("metadata object open brace"); - assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); - (&open_brace).write(stream); - stream.increment_indent(); - - let mut close_brace = None; - let mut commas = Vec::new(); - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::MetadataObjectItemNode { - true - } else if child.element().kind() == SyntaxKind::Comma { - commas.push(child.to_owned()); - false - } else { - assert!(child.element().kind() == SyntaxKind::CloseBrace); - close_brace = Some(child.to_owned()); - false - } - }) - .collect::>(); - - let mut commas = commas.iter(); - for item in items { - (&item).write(stream); - if let Some(comma) = commas.next() { - (comma).write(stream); - } else { - stream.push_literal(",".to_string(), SyntaxKind::Comma); - } - stream.end_line(); - } - - stream.decrement_indent(); - (&close_brace.expect("metadata object close brace")).write(stream); -} - -/// Formats a [`MetadataObjectItem`](wdl_ast::v1::MetadataObjectItem). -pub fn format_metadata_object_item(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("metadata object item children"); - - let key = children.next().expect("metadata object item key"); - assert!(key.element().kind() == SyntaxKind::Ident); - (&key).write(stream); - - let colon = children.next().expect("metadata object item colon"); - assert!(colon.element().kind() == SyntaxKind::Colon); - (&colon).write(stream); - stream.end_word(); - - let value = children.next().expect("metadata object item value"); - (&value).write(stream); - - assert!(children.next().is_none()); -} - -/// Formats a [MetadataSection](wdl_ast::v1::MetadataSection). -pub fn format_metadata_section(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("meta section children"); - - let meta_keyword = children.next().expect("meta keyword"); - assert!(meta_keyword.element().kind() == SyntaxKind::MetaKeyword); - (&meta_keyword).write(stream); - stream.end_word(); - - let open_brace = children.next().expect("metadata section open brace"); - assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); - (&open_brace).write(stream); - stream.increment_indent(); - - let mut close_brace = None; - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::MetadataObjectItemNode { - true - } else { - assert!(child.element().kind() == SyntaxKind::CloseBrace); - close_brace = Some(child.to_owned()); - false - } - }) - .collect::>(); - - for item in items { - (&item).write(stream); - stream.end_line(); - } - - stream.decrement_indent(); - (&close_brace.expect("metadata section close brace")).write(stream); - stream.end_line(); -} - -/// Formats a [`ParameterMetadataSection`](wdl_ast::v1::ParameterMetadataSection). -pub fn format_parameter_metadata_section( - element: &FormatElement, - stream: &mut TokenStream, -) { - let mut children = element.children().expect("parameter meta section children"); - - let parameter_meta_keyword = children.next().expect("parameter meta keyword"); - assert!(parameter_meta_keyword.element().kind() == SyntaxKind::ParameterMetaKeyword); - (¶meter_meta_keyword).write(stream); - stream.end_word(); - - let open_brace = children - .next() - .expect("parameter metadata section open brace"); - assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); - (&open_brace).write(stream); - stream.increment_indent(); - - let mut close_brace = None; - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::MetadataObjectItemNode { - true - } else { - assert!(child.element().kind() == SyntaxKind::CloseBrace); - close_brace = Some(child.to_owned()); - false - } - }) - .collect::>(); - - for item in items { - (&item).write(stream); - stream.end_line(); - } - - stream.decrement_indent(); - (&close_brace.expect("parameter metadata section close brace")).write(stream); - stream.end_line(); -} - /// Formats an [`OutputSection`](wdl_ast::v1::OutputSection). pub fn format_output_section(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("output section children"); diff --git a/wdl-format/src/v1/decl.rs b/wdl-format/src/v1/decl.rs new file mode 100644 index 000000000..6b1499425 --- /dev/null +++ b/wdl-format/src/v1/decl.rs @@ -0,0 +1,77 @@ +//! Formatting functions for declarations. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`PrimitiveType`](wdl_ast::v1::PrimitiveType). +pub fn format_primitive_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("primitive type children") { + (&child).write(stream); + } +} + +/// Formats an [`ArrayType`](wdl_ast::v1::ArrayType). +pub fn format_array_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("array type children") { + (&child).write(stream); + } +} + +/// Formats a [`MapType`](wdl_ast::v1::MapType). +pub fn format_map_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("map type children") { + (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } + } +} + +/// Formats an [`ObjectType`](wdl_ast::v1::ObjectType). +pub fn format_object_type(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("object type children"); + let object_keyword = children.next().expect("object type object keyword"); + assert!(object_keyword.element().kind() == SyntaxKind::ObjectTypeKeyword); + (&object_keyword).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`PairType`](wdl_ast::v1::PairType). +pub fn format_pair_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("pair type children") { + (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } + } +} + +/// Formats a [`TypeRef`](wdl_ast::v1::TypeRef). +pub fn format_type_ref(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("type ref children"); + let t = children.next().expect("type ref type"); + (&t).write(stream); + assert!(children.next().is_none()); +} + +/// Formats an [`UnboundDecl`](wdl_ast::v1::UnboundDecl). +pub fn format_unbound_decl(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("unbound decl children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} + +/// Formats a [`BoundDecl`](wdl_ast::v1::BoundDecl). +pub fn format_bound_decl(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("bound decl children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} diff --git a/wdl-format/src/v1/meta.rs b/wdl-format/src/v1/meta.rs new file mode 100644 index 000000000..e6fa8daa4 --- /dev/null +++ b/wdl-format/src/v1/meta.rs @@ -0,0 +1,195 @@ +//! Formatting functions for meta and parameter_meta sections. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`LiteralNull`](wdl_ast::v1::LiteralNull). +pub fn format_literal_null(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal null children"); + let null = children.next().expect("literal null token"); + (&null).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`MetadataArray`](wdl_ast::v1::MetadataArray). +pub fn format_metadata_array(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata array children"); + + let open_bracket = children.next().expect("metadata array open bracket"); + assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket); + (&open_bracket).write(stream); + stream.increment_indent(); + + let mut close_bracket = None; + let mut commas = Vec::new(); + let items = children + .filter(|child| { + if child.element().kind() == SyntaxKind::CloseBracket { + close_bracket = Some(child.to_owned()); + false + } else if child.element().kind() == SyntaxKind::Comma { + commas.push(child.to_owned()); + false + } else { + true + } + }) + .collect::>(); + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_bracket.expect("metadata array close bracket")).write(stream); +} + +/// Formats a [`MetadataObject`](wdl_ast::v1::MetadataObject). +pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata object children"); + + let open_brace = children.next().expect("metadata object open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut close_brace = None; + let mut commas = Vec::new(); + let items = children + .filter(|child| { + if child.element().kind() == SyntaxKind::MetadataObjectItemNode { + true + } else if child.element().kind() == SyntaxKind::Comma { + commas.push(child.to_owned()); + false + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.to_owned()); + false + } + }) + .collect::>(); + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("metadata object close brace")).write(stream); +} + +/// Formats a [`MetadataObjectItem`](wdl_ast::v1::MetadataObjectItem). +pub fn format_metadata_object_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata object item children"); + + let key = children.next().expect("metadata object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("metadata object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("metadata object item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [MetadataSection](wdl_ast::v1::MetadataSection). +pub fn format_metadata_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("meta section children"); + + let meta_keyword = children.next().expect("meta keyword"); + assert!(meta_keyword.element().kind() == SyntaxKind::MetaKeyword); + (&meta_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("metadata section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut close_brace = None; + let items = children + .filter(|child| { + if child.element().kind() == SyntaxKind::MetadataObjectItemNode { + true + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.to_owned()); + false + } + }) + .collect::>(); + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("metadata section close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`ParameterMetadataSection`](wdl_ast::v1::ParameterMetadataSection). +pub fn format_parameter_metadata_section( + element: &FormatElement, + stream: &mut TokenStream, +) { + let mut children = element.children().expect("parameter meta section children"); + + let parameter_meta_keyword = children.next().expect("parameter meta keyword"); + assert!(parameter_meta_keyword.element().kind() == SyntaxKind::ParameterMetaKeyword); + (¶meter_meta_keyword).write(stream); + stream.end_word(); + + let open_brace = children + .next() + .expect("parameter metadata section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut close_brace = None; + let items = children + .filter(|child| { + if child.element().kind() == SyntaxKind::MetadataObjectItemNode { + true + } else { + assert!(child.element().kind() == SyntaxKind::CloseBrace); + close_brace = Some(child.to_owned()); + false + } + }) + .collect::>(); + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("parameter metadata section close brace")).write(stream); + stream.end_line(); +} From 1aee37198a22d2b1cc89edb69b30d6b20aa6a72a Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 12:14:23 -0400 Subject: [PATCH 21/60] feat: line split if/then/else exprs Known issue with trailing commas getting put on their own line --- wdl-format/src/token/pre.rs | 10 + wdl-format/src/v1/expr.rs | 27 +- .../source.formatted.wdl | 890 ++++++++++++------ .../format/seaseq-case/source.formatted.wdl | 33 +- 4 files changed, 648 insertions(+), 312 deletions(-) diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 0455a8940..c9ada290a 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -212,4 +212,14 @@ impl TokenStream { pub fn push_literal(&mut self, value: String, kind: SyntaxKind) { self.0.push(PreToken::Literal(value, kind)); } + + /// Returns the kind of the last literal token in the stream. + pub fn last_literal_kind(&self) -> Option { + match self.0.last_chunk::<3>() { + Some([_, _, PreToken::Literal(_, kind)]) => Some(*kind), + Some([_, PreToken::Literal(_, kind), _]) => Some(*kind), + Some([PreToken::Literal(_, kind), _, _]) => Some(*kind), + _ => None, + } + } } diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index 5ea2e7a5d..4c6005012 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -357,11 +357,10 @@ pub fn format_literal_map(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("if expr children"); + let nested_else_if = match stream.last_literal_kind() { + Some(SyntaxKind::ElseKeyword) => true, + _ => false, + }; + let if_keyword = children.next().expect("if keyword"); assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword); (&if_keyword).write(stream); @@ -677,13 +680,21 @@ pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream 1 then pool_blacklist.ta_pooled else if length(blacklists) > 0 then blacklists[0] else blacklist2_ + File? blacklist_ = if length(blacklists) > 1 + then pool_blacklist.ta_pooled + else if length(blacklists) > 0 then blacklists[0] + else blacklist2_ String mito_chr_name_ = select_first([ mito_chr_name, read_genome_tsv.mito_chr_name, @@ -1112,105 +1121,145 @@ workflow chip { ]) ### temp vars (do not define these) - String aligner_ = if defined(custom_align_py) then "custom" else aligner - String peak_caller_ = if pipeline_type == "tf" then select_first([ - peak_caller, - "spp", - ]) else select_first([ - peak_caller, - "macs2", - ]) - String peak_type_ = if peak_caller_ == "spp" then "regionPeak" else "narrowPeak" + String aligner_ = if defined(custom_align_py) + then "custom" + else aligner + String peak_caller_ = if pipeline_type == "tf" + then select_first([ + peak_caller, + "spp", + ]) + else select_first([ + peak_caller, + "macs2", + ]) + String peak_type_ = if peak_caller_ == "spp" + then "regionPeak" + else "narrowPeak" Boolean enable_idr = pipeline_type == "tf" # enable_idr for TF chipseq only - String idr_rank_ = if peak_caller_ == "spp" then "signal.value" else if peak_caller_ == "macs2" then "p.value" else "p.value" + String idr_rank_ = if peak_caller_ == "spp" + then "signal.value" + else if peak_caller_ == "macs2" then "p.value" + else "p.value" Int cap_num_peak_spp = 300000 Int cap_num_peak_macs2 = 500000 - Int cap_num_peak_ = if peak_caller_ == "spp" then select_first([ - cap_num_peak, - cap_num_peak_spp, - ]) else select_first([ - cap_num_peak, - cap_num_peak_macs2, - ]) + Int cap_num_peak_ = if peak_caller_ == "spp" + then select_first([ + cap_num_peak, + cap_num_peak_spp, + ]) + else select_first([ + cap_num_peak, + cap_num_peak_macs2, + ]) Int mapq_thresh_ = mapq_thresh - Boolean enable_xcor_ = if pipeline_type == "control" then false else true - Boolean enable_count_signal_track_ = if pipeline_type == "control" then false else enable_count_signal_track - Boolean enable_jsd_ = if pipeline_type == "control" then false else enable_jsd - Boolean enable_gc_bias_ = if pipeline_type == "control" then false else enable_gc_bias - Boolean align_only_ = if pipeline_type == "control" then true else align_only - - Float align_mem_factor_ = if aligner_ == "bowtie2" then align_bowtie2_mem_factor else align_bwa_mem_factor - Float align_disk_factor_ = if aligner_ == "bowtie2" then align_bowtie2_disk_factor else align_bwa_disk_factor - Float call_peak_mem_factor_ = if peak_caller_ == "spp" then call_peak_spp_mem_factor else call_peak_macs2_mem_factor - Float call_peak_disk_factor_ = if peak_caller_ == "spp" then call_peak_spp_disk_factor else call_peak_macs2_disk_factor + Boolean enable_xcor_ = if pipeline_type == "control" + then false + else true + Boolean enable_count_signal_track_ = if pipeline_type == "control" + then false + else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type == "control" + then false + else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type == "control" + then false + else enable_gc_bias + Boolean align_only_ = if pipeline_type == "control" + then true + else align_only + + Float align_mem_factor_ = if aligner_ == "bowtie2" + then align_bowtie2_mem_factor + else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ == "bowtie2" + then align_bowtie2_disk_factor + else align_bwa_disk_factor + Float call_peak_mem_factor_ = if peak_caller_ == "spp" + then call_peak_spp_mem_factor + else call_peak_macs2_mem_factor + Float call_peak_disk_factor_ = if peak_caller_ == "spp" + then call_peak_spp_disk_factor + else call_peak_macs2_disk_factor # temporary 2-dim fastqs array [rep_id][merge_id] - Array[Array[File]] fastqs_R1 = if length(fastqs_rep10_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - fastqs_rep8_R1, - fastqs_rep9_R1, - fastqs_rep10_R1, - ] else if length(fastqs_rep9_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - fastqs_rep8_R1, - fastqs_rep9_R1, - ] else if length(fastqs_rep8_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - fastqs_rep8_R1, - ] else if length(fastqs_rep7_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - ] else if length(fastqs_rep6_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - ] else if length(fastqs_rep5_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - ] else if length(fastqs_rep4_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - ] else if length(fastqs_rep3_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - ] else if length(fastqs_rep2_R1) > 0 then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - ] else if length(fastqs_rep1_R1) > 0 then [ - fastqs_rep1_R1, - ] else [] + Array[Array[File]] fastqs_R1 = if length(fastqs_rep10_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + fastqs_rep10_R1, + ] + else if length(fastqs_rep9_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + ] + else if length(fastqs_rep8_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + ] + else if length(fastqs_rep7_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + ] + else if length(fastqs_rep6_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + ] + else if length(fastqs_rep5_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + ] + else if length(fastqs_rep4_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + ] + else if length(fastqs_rep3_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + ] + else if length(fastqs_rep2_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + ] + else if length(fastqs_rep1_R1) > 0 then [ + fastqs_rep1_R1, + ] + else [] # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] fastqs_R2 = [ fastqs_rep1_R2, @@ -1226,72 +1275,83 @@ workflow chip { ] # temporary 2-dim ctl fastqs array [rep_id][merge_id] - Array[Array[File]] ctl_fastqs_R1 = if length(ctl_fastqs_rep10_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ctl_fastqs_rep8_R1, - ctl_fastqs_rep9_R1, - ctl_fastqs_rep10_R1, - ] else if length(ctl_fastqs_rep9_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ctl_fastqs_rep8_R1, - ctl_fastqs_rep9_R1, - ] else if length(ctl_fastqs_rep8_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ctl_fastqs_rep8_R1, - ] else if length(ctl_fastqs_rep7_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ] else if length(ctl_fastqs_rep6_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ] else if length(ctl_fastqs_rep5_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ] else if length(ctl_fastqs_rep4_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ] else if length(ctl_fastqs_rep3_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ] else if length(ctl_fastqs_rep2_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ] else if length(ctl_fastqs_rep1_R1) > 0 then [ - ctl_fastqs_rep1_R1, - ] else [] + Array[Array[File]] ctl_fastqs_R1 = if length(ctl_fastqs_rep10_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ctl_fastqs_rep10_R1, + ] + else if length(ctl_fastqs_rep9_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ] + else if length(ctl_fastqs_rep8_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ] + else if length(ctl_fastqs_rep7_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ] + else if length(ctl_fastqs_rep6_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ] + else if length(ctl_fastqs_rep5_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ] + else if length(ctl_fastqs_rep4_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ] + else if length(ctl_fastqs_rep3_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ] + else if length(ctl_fastqs_rep2_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ] + else if length(ctl_fastqs_rep1_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ] + else [] # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] ctl_fastqs_R2 = [ ctl_fastqs_rep1_R2, @@ -1309,17 +1369,31 @@ workflow chip { # temporary variables to get number of replicates # WDLic implementation of max(A,B,C,...) Int num_rep_fastq = length(fastqs_R1) - Int num_rep_bam = if length(bams) < num_rep_fastq then num_rep_fastq else length(bams) - Int num_rep_nodup_bam = if length(nodup_bams) < num_rep_bam then num_rep_bam else length(nodup_bams) - Int num_rep_ta = if length(tas) < num_rep_nodup_bam then num_rep_nodup_bam else length(tas) - Int num_rep_peak = if length(peaks) < num_rep_ta then num_rep_ta else length(peaks) + Int num_rep_bam = if length(bams) < num_rep_fastq + then num_rep_fastq + else length(bams) + Int num_rep_nodup_bam = if length(nodup_bams) < num_rep_bam + then num_rep_bam + else length(nodup_bams) + Int num_rep_ta = if length(tas) < num_rep_nodup_bam + then num_rep_nodup_bam + else length(tas) + Int num_rep_peak = if length(peaks) < num_rep_ta + then num_rep_ta + else length(peaks) Int num_rep = num_rep_peak # temporary variables to get number of controls Int num_ctl_fastq = length(ctl_fastqs_R1) - Int num_ctl_bam = if length(ctl_bams) < num_ctl_fastq then num_ctl_fastq else length(ctl_bams) - Int num_ctl_nodup_bam = if length(ctl_nodup_bams) < num_ctl_bam then num_ctl_bam else length(ctl_nodup_bams) - Int num_ctl_ta = if length(ctl_tas) < num_ctl_nodup_bam then num_ctl_nodup_bam else length(ctl_tas) + Int num_ctl_bam = if length(ctl_bams) < num_ctl_fastq + then num_ctl_fastq + else length(ctl_bams) + Int num_ctl_nodup_bam = if length(ctl_nodup_bams) < num_ctl_bam + then num_ctl_bam + else length(ctl_nodup_bams) + Int num_ctl_ta = if length(ctl_tas) < num_ctl_nodup_bam + then num_ctl_nodup_bam + else length(ctl_tas) Int num_ctl = num_ctl_ta # sanity check for inputs @@ -1383,16 +1457,21 @@ workflow chip { scatter (i in range(num_rep)) { # to override endedness definition for individual replicate # paired_end will override paired_ends[i] - Boolean paired_end_ = if !defined(paired_end) && i < length(paired_ends) then paired_ends[i] else select_first([ - paired_end, - ]) + Boolean paired_end_ = if !defined(paired_end) && i < length(paired_ends) + then paired_ends[i] + else select_first([ + paired_end, + ]) Boolean has_input_of_align = i < length(fastqs_R1) && length(fastqs_R1[i]) > 0 Boolean has_output_of_align = i < length(bams) if (has_input_of_align && !has_output_of_align) { call align { input: fastqs_R1 = fastqs_R1[i], - fastqs_R2 = if paired_end_ then fastqs_R2[i] else [], + fastqs_R2 = if paired_end_ + then fastqs_R2[i] + else [] + , crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1400,7 +1479,11 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" then bowtie2_idx_tar_ else custom_aligner_idx_tar, + idx_tar = if aligner == "bwa" + then bwa_idx_tar_ + else if aligner == "bowtie2" then bowtie2_idx_tar_ + else custom_aligner_idx_tar + , paired_end = paired_end_, use_bwa_mem_for_pe = use_bwa_mem_for_pe, bwa_mem_read_len_limit = bwa_mem_read_len_limit, @@ -1415,7 +1498,9 @@ workflow chip { runtime_environment = runtime_environment, } } - File? bam_ = if has_output_of_align then bams[i] else align.bam + File? bam_ = if has_output_of_align + then bams[i] + else align.bam Boolean has_input_of_filter = has_output_of_align || defined(align.bam) Boolean has_output_of_filter = i < length(nodup_bams) @@ -1441,7 +1526,9 @@ workflow chip { runtime_environment = runtime_environment, } } - File? nodup_bam_ = if has_output_of_filter then nodup_bams[i] else filter.nodup_bam + File? nodup_bam_ = if has_output_of_filter + then nodup_bams[i] + else filter.nodup_bam Boolean has_input_of_bam2ta = has_output_of_filter || defined(filter.nodup_bam) Boolean has_output_of_bam2ta = i < length(tas) @@ -1459,7 +1546,9 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ta_ = if has_output_of_bam2ta then tas[i] else bam2ta.ta + File? ta_ = if has_output_of_bam2ta + then tas[i] + else bam2ta.ta Boolean has_input_of_spr = has_output_of_bam2ta || defined(bam2ta.ta) if (has_input_of_spr && !align_only_ && !true_rep_only) { @@ -1505,7 +1594,11 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" then bowtie2_idx_tar_ else custom_aligner_idx_tar, + idx_tar = if aligner == "bwa" + then bwa_idx_tar_ + else if aligner == "bowtie2" then bowtie2_idx_tar_ + else custom_aligner_idx_tar + , paired_end = false, use_bwa_mem_for_pe = false, bwa_mem_read_len_limit = 0, @@ -1590,8 +1683,13 @@ workflow chip { # if not starting from fastqs, keep using old method # (mapping with both ends for tag-aligns to be used for xcor) # subsample tagalign (non-mito) and cross-correlation analysis - File? ta_xcor = if defined(bam2ta_no_dedup_R1.ta) then bam2ta_no_dedup_R1.ta else if defined(bam2ta_no_dedup.ta) then bam2ta_no_dedup.ta else ta_ - Boolean paired_end_xcor = if defined(bam2ta_no_dedup_R1.ta) then false else paired_end_ + File? ta_xcor = if defined(bam2ta_no_dedup_R1.ta) + then bam2ta_no_dedup_R1.ta + else if defined(bam2ta_no_dedup.ta) then bam2ta_no_dedup.ta + else ta_ + Boolean paired_end_xcor = if defined(bam2ta_no_dedup_R1.ta) + then false + else paired_end_ Boolean has_input_of_xcor = defined(ta_xcor) if (has_input_of_xcor && enable_xcor_) { @@ -1613,24 +1711,31 @@ workflow chip { # before peak calling, get fragment length from xcor analysis or given input # if fraglen [] is defined in the input JSON, fraglen from xcor will be ignored - Int? fraglen_ = if i < length(fraglen) then fraglen[i] else xcor.fraglen + Int? fraglen_ = if i < length(fraglen) + then fraglen[i] + else xcor.fraglen } # align each control scatter (i in range(num_ctl)) { # to override endedness definition for individual control # ctl_paired_end will override ctl_paired_ends[i] - Boolean ctl_paired_end_ = if !defined(ctl_paired_end) && i < length(ctl_paired_ends) then ctl_paired_ends[i] else select_first([ - ctl_paired_end, - paired_end, - ]) + Boolean ctl_paired_end_ = if !defined(ctl_paired_end) && i < length(ctl_paired_ends) + then ctl_paired_ends[i] + else select_first([ + ctl_paired_end, + paired_end, + ]) Boolean has_input_of_align_ctl = i < length(ctl_fastqs_R1) && length(ctl_fastqs_R1[i]) > 0 Boolean has_output_of_align_ctl = i < length(ctl_bams) if (has_input_of_align_ctl && !has_output_of_align_ctl) { call align as align_ctl { input: fastqs_R1 = ctl_fastqs_R1[i], - fastqs_R2 = if ctl_paired_end_ then ctl_fastqs_R2[i] else [], + fastqs_R2 = if ctl_paired_end_ + then ctl_fastqs_R2[i] + else [] + , crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1638,7 +1743,11 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" then bowtie2_idx_tar_ else custom_aligner_idx_tar, + idx_tar = if aligner == "bwa" + then bwa_idx_tar_ + else if aligner == "bowtie2" then bowtie2_idx_tar_ + else custom_aligner_idx_tar + , paired_end = ctl_paired_end_, use_bwa_mem_for_pe = use_bwa_mem_for_pe, bwa_mem_read_len_limit = bwa_mem_read_len_limit, @@ -1653,7 +1762,9 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_bam_ = if has_output_of_align_ctl then ctl_bams[i] else align_ctl.bam + File? ctl_bam_ = if has_output_of_align_ctl + then ctl_bams[i] + else align_ctl.bam Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam) Boolean has_output_of_filter_ctl = i < length(ctl_nodup_bams) @@ -1679,7 +1790,9 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_nodup_bam_ = if has_output_of_filter_ctl then ctl_nodup_bams[i] else filter_ctl.nodup_bam + File? ctl_nodup_bam_ = if has_output_of_filter_ctl + then ctl_nodup_bams[i] + else filter_ctl.nodup_bam Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam) Boolean has_output_of_bam2ta_ctl = i < length(ctl_tas) @@ -1697,7 +1810,9 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_ta_ = if has_output_of_bam2ta_ctl then ctl_tas[i] else bam2ta_ctl.ta + File? ctl_ta_ = if has_output_of_bam2ta_ctl + then ctl_tas[i] + else bam2ta_ctl.ta } # if there are TAs for ALL replicates then pool them @@ -1795,17 +1910,27 @@ workflow chip { # >=0: control TA index (this means that control TA with this index exists) # -1: use pooled control # -2: there is no control - Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then select_first([ - choose_ctl.chosen_ctl_ta_ids, - ])[i] else -2 - Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then select_first([ - choose_ctl.chosen_ctl_ta_subsample, - ])[i] else 0 - Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] else ctl_paired_end_[chosen_ctl_ta_id] + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ + then select_first([ + choose_ctl.chosen_ctl_ta_ids, + ])[i] + else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ + then select_first([ + choose_ctl.chosen_ctl_ta_subsample, + ])[i] + else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 + then false + else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] + else ctl_paired_end_[chosen_ctl_ta_id] if (chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0) { call subsample_ctl { input: - ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled else ctl_ta_[chosen_ctl_ta_id], + ta = if chosen_ctl_ta_id == -1 + then pool_ta_ctl.ta_pooled + else ctl_ta_[chosen_ctl_ta_id] + , subsample = chosen_ctl_ta_subsample, paired_end = chosen_ctl_paired_end, mem_factor = subsample_ctl_mem_factor, @@ -1813,23 +1938,29 @@ workflow chip { runtime_environment = runtime_environment, } } - Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] else if chosen_ctl_ta_subsample > 0 then [ - select_first([ - subsample_ctl.ta_subsampled, - ]), - ] else if chosen_ctl_ta_id == -1 then [ - select_first([ - pool_ta_ctl.ta_pooled, - ]), - ] else [ - select_first([ - ctl_ta_[chosen_ctl_ta_id], - ]), - ] + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 + then [] + else if chosen_ctl_ta_subsample > 0 then [ + select_first([ + subsample_ctl.ta_subsampled, + ]), + ] + else if chosen_ctl_ta_id == -1 then [ + select_first([ + pool_ta_ctl.ta_pooled, + ]), + ] + else [ + select_first([ + ctl_ta_[chosen_ctl_ta_id], + ]), + ] } - Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then select_first([ - choose_ctl.chosen_ctl_ta_subsample_pooled, - ]) else 0 + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ + then select_first([ + choose_ctl.chosen_ctl_ta_subsample_pooled, + ]) + else 0 # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) Array[Int] fraglen_tmp = select_all(fraglen_) @@ -1861,10 +1992,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + runtime_environment = if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 + else runtime_environment + , } } - File? peak_ = if has_output_of_call_peak then peaks[i] else call_peak.peak + File? peak_ = if has_output_of_call_peak + then peaks[i] + else call_peak.peak # signal track if (has_input_of_call_peak && !align_only_) { @@ -1913,10 +2050,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + runtime_environment = if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 + else runtime_environment + , } } - File? peak_pr1_ = if has_output_of_call_peak_pr1 then peaks_pr1[i] else call_peak_pr1.peak + File? peak_pr1_ = if has_output_of_call_peak_pr1 + then peaks_pr1[i] + else call_peak_pr1.peak # call peaks on 2nd pseudo replicated tagalign Boolean has_input_of_call_peak_pr2 = defined(spr.ta_pr2[i]) @@ -1944,10 +2087,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + runtime_environment = if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 + else runtime_environment + , } } - File? peak_pr2_ = if has_output_of_call_peak_pr2 then peaks_pr2[i] else call_peak_pr2.peak + File? peak_pr2_ = if has_output_of_call_peak_pr2 + then peaks_pr2[i] + else call_peak_pr2.peak } # if ( !align_only_ && num_rep > 1 ) { @@ -1962,7 +2111,10 @@ workflow chip { if (has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0) { call subsample_ctl as subsample_ctl_pooled { input: - ta = if num_ctl < 2 then ctl_ta_[0] else pool_ta_ctl.ta_pooled, + ta = if num_ctl < 2 + then ctl_ta_[0] + else pool_ta_ctl.ta_pooled + , subsample = chosen_ctl_ta_pooled_subsample, paired_end = ctl_paired_end_[0], mem_factor = subsample_ctl_mem_factor, @@ -1971,13 +2123,17 @@ workflow chip { } } # actually not an array - Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] else if chosen_ctl_ta_pooled_subsample > 0 then [ - subsample_ctl_pooled.ta_subsampled, - ] else if num_ctl < 2 then [ - ctl_ta_[0], - ] else [ - pool_ta_ctl.ta_pooled, - ] + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ + then [] + else if chosen_ctl_ta_pooled_subsample > 0 then [ + subsample_ctl_pooled.ta_subsampled, + ] + else if num_ctl < 2 then [ + ctl_ta_[0], + ] + else [ + pool_ta_ctl.ta_pooled, + ] Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) Boolean has_output_of_call_peak_pooled = defined(peak_pooled) @@ -2006,10 +2162,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + runtime_environment = if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 + else runtime_environment + , } } - File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled else call_peak_pooled.peak + File? peak_pooled_ = if has_output_of_call_peak_pooled + then peak_pooled + else call_peak_pooled.peak # macs2 signal track for pooled rep if (has_input_of_call_peak_pooled && !align_only_ && num_rep > 1) { @@ -2058,10 +2220,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + runtime_environment = if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 + else runtime_environment + , } } - File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 else call_peak_ppr1.peak + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 + then peak_ppr1 + else call_peak_ppr1.peak Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) @@ -2089,10 +2257,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, + runtime_environment = if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 + else runtime_environment + , } } - File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 else call_peak_ppr2.peak + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 + then peak_ppr2 + else call_peak_ppr2.peak # do IDR/overlap on all pairs of two replicates (i,j) # where i and j are zero-based indices and 0 <= i < j < num_rep @@ -2219,9 +2393,12 @@ workflow chip { call reproducibility as reproducibility_overlap { input: prefix = "overlap", peaks = select_all(overlap.bfilt_overlap_peak), - peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([ - overlap_pr.bfilt_overlap_peak, - ]) else [], + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) + then select_first([ + overlap_pr.bfilt_overlap_peak, + ]) + else [] + , peak_ppr = overlap_ppr.bfilt_overlap_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2234,9 +2411,12 @@ workflow chip { call reproducibility as reproducibility_idr { input: prefix = "idr", peaks = select_all(idr.bfilt_idr_peak), - peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([ - idr_pr.bfilt_idr_peak, - ]) else [], + peaks_pr = if defined(idr_pr.bfilt_idr_peak) + then select_first([ + idr_pr.bfilt_idr_peak, + ]) + else [] + , peak_ppr = idr_ppr.bfilt_idr_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2275,9 +2455,12 @@ workflow chip { ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), jsd_plot = jsd.plot, - jsd_qcs = if defined(jsd.jsd_qcs) then select_first([ - jsd.jsd_qcs, - ]) else [], + jsd_qcs = if defined(jsd.jsd_qcs) + then select_first([ + jsd.jsd_qcs, + ]) + else [] + , frip_qcs = select_all(call_peak.frip_qc), frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), @@ -2287,19 +2470,28 @@ workflow chip { frip_qc_ppr2 = call_peak_ppr2.frip_qc, idr_plots = select_all(idr.idr_plot), - idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([ - idr_pr.idr_plot, - ]) else [], + idr_plots_pr = if defined(idr_pr.idr_plot) + then select_first([ + idr_pr.idr_plot, + ]) + else [] + , idr_plot_ppr = idr_ppr.idr_plot, frip_idr_qcs = select_all(idr.frip_qc), - frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([ - idr_pr.frip_qc, - ]) else [], + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) + then select_first([ + idr_pr.frip_qc, + ]) + else [] + , frip_idr_qc_ppr = idr_ppr.frip_qc, frip_overlap_qcs = select_all(overlap.frip_qc), - frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([ - overlap_pr.frip_qc, - ]) else [], + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) + then select_first([ + overlap_pr.frip_qc, + ]) + else [] + , frip_overlap_qc_ppr = overlap_ppr.frip_qc, idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, @@ -2364,12 +2556,14 @@ task align { Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) Float trimmomatic_java_heap_factor = 0.9 - Array[Array[File]] tmp_fastqs = if paired_end then transpose([ - fastqs_R1, - fastqs_R2, - ]) else transpose([ - fastqs_R1, - ]) + Array[Array[File]] tmp_fastqs = if paired_end + then transpose([ + fastqs_R1, + fastqs_R2, + ]) + else transpose([ + fastqs_R1, + ]) command <<< set -e @@ -2382,7 +2576,10 @@ task align { fi python3 $(which encode_task_merge_fastq.py) \ ~{write_tsv(tmp_fastqs)} \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "--paired-end" + else "" + } \ ~{"--nth " + cpu} if [ -z '~{trim_bp}' ]; then @@ -2406,14 +2603,26 @@ task align { NEW_SUFFIX="$SUFFIX"_cropped python3 $(which encode_task_trimmomatic.py) \ --fastq1 R1$SUFFIX/*.fastq.gz \ - ~{if paired_end then "--fastq2 R2$SUFFIX/*.fastq.gz" else ""} \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "--fastq2 R2$SUFFIX/*.fastq.gz" + else "" + } \ + ~{if paired_end + then "--paired-end" + else "" + } \ --crop-length ~{crop_length} \ --crop-length-tol "~{crop_length_tol}" \ ~{"--phred-score-format " + trimmomatic_phred_score_format} \ --out-dir-R1 R1$NEW_SUFFIX \ - ~{if paired_end then "--out-dir-R2 R2$NEW_SUFFIX" else ""} \ - ~{"--trimmomatic-java-heap " + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + "G")} \ + ~{if paired_end + then "--out-dir-R2 R2$NEW_SUFFIX" + else "" + } \ + ~{"--trimmomatic-java-heap " + if defined(trimmomatic_java_heap) + then trimmomatic_java_heap + else (round(mem_gb * trimmomatic_java_heap_factor) + "G") + } \ ~{"--nth " + cpu} SUFFIX=$NEW_SUFFIX fi @@ -2422,9 +2631,18 @@ task align { python3 $(which encode_task_bwa.py) \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ - ~{if paired_end then "--paired-end" else ""} \ - ~{if use_bwa_mem_for_pe then "--use-bwa-mem-for-pe" else ""} \ + ~{if paired_end + then "R2$SUFFIX/*.fastq.gz" + else "" + } \ + ~{if paired_end + then "--paired-end" + else "" + } \ + ~{if use_bwa_mem_for_pe + then "--use-bwa-mem-for-pe" + else "" + } \ ~{"--bwa-mem-read-len-limit " + bwa_mem_read_len_limit} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} @@ -2433,18 +2651,33 @@ task align { python3 $(which encode_task_bowtie2.py) \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end + then "R2$SUFFIX/*.fastq.gz" + else "" + } \ ~{"--multimapping " + multimapping} \ - ~{if paired_end then "--paired-end" else ""} \ - ~{if use_bowtie2_local_mode then "--local" else ""} \ + ~{if paired_end + then "--paired-end" + else "" + } \ + ~{if use_bowtie2_local_mode + then "--local" + else "" + } \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} else python3 ~{custom_align_py} \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "R2$SUFFIX/*.fastq.gz" + else "" + } \ + ~{if paired_end + then "--paired-end" + else "" + } \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} fi @@ -2510,17 +2743,26 @@ task filter { set -e python3 $(which encode_task_filter.py) \ ~{bam} \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "--paired-end" + else "" + } \ --multimapping 0 \ ~{"--dup-marker " + dup_marker} \ ~{"--mapq-thresh " + mapq_thresh} \ --filter-chrs ~{sep=" " filter_chrs} \ ~{"--chrsz " + chrsz} \ - ~{if no_dup_removal then "--no-dup-removal" else ""} \ + ~{if no_dup_removal + then "--no-dup-removal" + else "" + } \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} \ - ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G")} + ~{"--picard-java-heap " + if defined(picard_java_heap) + then picard_java_heap + else (round(mem_gb * picard_java_heap_factor) + "G") + } if [ '~{redact_nodup_bam}' == 'true' ]; then python3 $(which encode_task_bam_to_pbam.py) \ @@ -2575,7 +2817,10 @@ task bam2ta { python3 $(which encode_task_bam2ta.py) \ ~{bam} \ --disable-tn5-shift \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "--paired-end" + else "" + } \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--subsample " + subsample} \ ~{"--mem-gb " + samtools_mem_gb} \ @@ -2619,7 +2864,10 @@ task spr { python3 $(which encode_task_spr.py) \ ~{ta} \ ~{"--pseudoreplication-random-seed " + pseudoreplication_random_seed} \ - ~{if paired_end then "--paired-end" else ""} + ~{if paired_end + then "--paired-end" + else "" + } >>> output { @@ -2700,7 +2948,10 @@ task xcor { set -e python3 $(which encode_task_xcor.py) \ ~{ta} \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "--paired-end" + else "" + } \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--subsample " + subsample} \ ~{"--chip-seq-type " + chip_seq_type} \ @@ -2753,7 +3004,10 @@ task jsd { set -e python3 $(which encode_task_jsd.py) \ ~{sep=" " select_all(nodup_bams)} \ - ~{if length(ctl_bams) > 0 then "--ctl-bam " + select_first(ctl_bams) else ""} \ + ~{if length(ctl_bams) > 0 + then "--ctl-bam " + select_first(ctl_bams) + else "" + } \ ~{"--mapq-thresh " + mapq_thresh} \ ~{"--blacklist " + blacklist} \ ~{"--nth " + cpu} @@ -2798,7 +3052,10 @@ task choose_ctl { --ctl-tas ~{sep=" " select_all(ctl_tas)} \ ~{"--ta-pooled " + ta_pooled} \ ~{"--ctl-ta-pooled " + ctl_ta_pooled} \ - ~{if always_use_pooled_ctl then "--always-use-pooled-ctl" else ""} \ + ~{if always_use_pooled_ctl + then "--always-use-pooled-ctl" + else "" + } \ ~{"--ctl-depth-ratio " + ctl_depth_ratio} \ ~{"--ctl-depth-limit " + ctl_depth_limit} \ ~{"--exp-ctl-depth-ratio-limit " + exp_ctl_depth_ratio_limit} @@ -2880,7 +3137,10 @@ task subsample_ctl { python3 $(which encode_task_subsample_ctl.py) \ ~{ta} \ ~{"--subsample " + subsample} \ - ~{if paired_end then "--paired-end" else ""} \ + ~{if paired_end + then "--paired-end" + else "" + } \ >>> output { @@ -2975,7 +3235,9 @@ task call_peak { } runtime { - cpu: if peak_caller == "macs2" then 2 else cpu + cpu: if peak_caller == "macs2" + then 2 + else cpu memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" @@ -3057,7 +3319,10 @@ task idr { command <<< set -e - ~{if defined(ta) then "" else "touch null.frip.qc"} + ~{if defined(ta) + then "" + else "touch null.frip.qc" + } touch null python3 $(which encode_task_idr.py) \ ~{peak1} ~{peak2} ~{peak_pooled} \ @@ -3082,7 +3347,9 @@ task idr { File idr_plot = glob("*.txt.png")[0] File idr_unthresholded_peak = glob("*.txt.gz")[0] File idr_log = glob("*.idr*.log")[0] - File frip_qc = if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] + File frip_qc = if defined(ta) + then glob("*.frip.qc")[0] + else glob("null")[0] } runtime { @@ -3116,7 +3383,10 @@ task overlap { command <<< set -e - ~{if defined(ta) then "" else "touch null.frip.qc"} + ~{if defined(ta) + then "" + else "touch null.frip.qc" + } touch null python3 $(which encode_task_overlap.py) \ ~{peak1} ~{peak2} ~{peak_pooled} \ @@ -3137,7 +3407,9 @@ task overlap { File bfilt_overlap_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] File bfilt_overlap_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] File bfilt_overlap_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] - File frip_qc = if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] + File frip_qc = if defined(ta) + then glob("*.frip.qc")[0] + else glob("null")[0] } runtime { @@ -3228,7 +3500,10 @@ task gc_bias { python3 $(which encode_task_gc_bias.py) \ ~{"--nodup-bam " + nodup_bam} \ ~{"--ref-fa " + ref_fa} \ - ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G")} + ~{"--picard-java-heap " + if defined(picard_java_heap) + then picard_java_heap + else (round(mem_gb * picard_java_heap_factor) + "G") + } >>> output { @@ -3331,7 +3606,10 @@ task qc_report { --ctl-paired-ends ~{sep=" " ctl_paired_ends} \ --pipeline-type ~{pipeline_type} \ --aligner ~{aligner} \ - ~{if (no_dup_removal) then "--no-dup-removal " else ""} \ + ~{if (no_dup_removal) + then "--no-dup-removal " + else "" + } \ --peak-caller ~{peak_caller} \ ~{"--cap-num-peak " + cap_num_peak} \ --idr-thresh ~{idr_thresh} \ @@ -3430,15 +3708,33 @@ task read_genome_tsv { output { String? genome_name = read_string("genome_name") - String? ref_fa = if size("ref_fa") == 0 then null_s else read_string("ref_fa") - String? bwa_idx_tar = if size("bwa_idx_tar") == 0 then null_s else read_string("bwa_idx_tar") - String? bowtie2_idx_tar = if size("bowtie2_idx_tar") == 0 then null_s else read_string("bowtie2_idx_tar") - String? chrsz = if size("chrsz") == 0 then null_s else read_string("chrsz") - String? gensz = if size("gensz") == 0 then null_s else read_string("gensz") - String? blacklist = if size("blacklist") == 0 then null_s else read_string("blacklist") - String? blacklist2 = if size("blacklist2") == 0 then null_s else read_string("blacklist2") - String? mito_chr_name = if size("mito_chr_name") == 0 then null_s else read_string("mito_chr_name") - String? regex_bfilt_peak_chr_name = if size("regex_bfilt_peak_chr_name") == 0 then "chr[\\dXY]+" else read_string("regex_bfilt_peak_chr_name") + String? ref_fa = if size("ref_fa") == 0 + then null_s + else read_string("ref_fa") + String? bwa_idx_tar = if size("bwa_idx_tar") == 0 + then null_s + else read_string("bwa_idx_tar") + String? bowtie2_idx_tar = if size("bowtie2_idx_tar") == 0 + then null_s + else read_string("bowtie2_idx_tar") + String? chrsz = if size("chrsz") == 0 + then null_s + else read_string("chrsz") + String? gensz = if size("gensz") == 0 + then null_s + else read_string("gensz") + String? blacklist = if size("blacklist") == 0 + then null_s + else read_string("blacklist") + String? blacklist2 = if size("blacklist2") == 0 + then null_s + else read_string("blacklist2") + String? mito_chr_name = if size("mito_chr_name") == 0 + then null_s + else read_string("mito_chr_name") + String? regex_bfilt_peak_chr_name = if size("regex_bfilt_peak_chr_name") == 0 + then "chr[\\dXY]+" + else read_string("regex_bfilt_peak_chr_name") } runtime { diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl index cdf974454..df595fc04 100644 --- a/wdl-format/tests/format/seaseq-case/source.formatted.wdl +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -282,24 +282,37 @@ workflow seaseq { ### ------------------------------------------------- ### # if multiple fastqfiles are provided - Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false - Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false + Boolean multi_fastq = if length(original_fastqfiles) > 1 + then true + else false + Boolean one_fastq = if length(original_fastqfiles) == 1 + then true + else false if (defined(spikein_bowtie_index) || defined(spikein_reference)) { scatter (eachfastq in original_fastqfiles) { call fastqc.fastqc as spikein_indv_fastqc { input: inputfile = eachfastq, - default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC", + default_location = if (one_fastq) + then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" + else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" + , } call util.basicfastqstats as spikein_indv_bfs { input: fastqfile = eachfastq, - default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats", + default_location = if (one_fastq) + then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + , } call bowtie.spikein_SE as spikein_indv_map { input: fastqfile = eachfastq, index_files = actual_spikein_bowtie_index, metricsfile = spikein_indv_bfs.metrics_out, - default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats", + default_location = if (one_fastq) + then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + , } } @@ -392,8 +405,14 @@ workflow seaseq { call samtools.mergebam { input: bamfiles = indv_mapping.sorted_bam, metricsfiles = indv_bfs.metrics_out, - default_location = if defined(results_name) then results_name + "/BAM_files" else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files", - outputfile = if defined(results_name) then results_name + ".sorted.bam" else "AllMerge_" + length(fastqfiles) + "_mapped.sorted.bam", + default_location = if defined(results_name) + then results_name + "/BAM_files" + else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files" + , + outputfile = if defined(results_name) + then results_name + ".sorted.bam" + else "AllMerge_" + length(fastqfiles) + "_mapped.sorted.bam" + , } call fastqc.fastqc as mergebamfqc { input: From 6f35e56846332a390b8e47a66d233c571f89a7f2 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 12:41:12 -0400 Subject: [PATCH 22/60] revise: rename BlankLinesContext --- wdl-format/src/token/post.rs | 2 +- wdl-format/src/token/pre.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index aaec2dbc8..3409021ea 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -131,7 +131,7 @@ impl Postprocessor { self.indent_level = self.indent_level.saturating_sub(1); self.end_line(stream); } - PreToken::BlankLinesContext(policy) => { + PreToken::LineSpacingPolicy(policy) => { self.blank_lines_allowed = policy; } PreToken::Literal(value, kind) => { diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index c9ada290a..05f35ad57 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -34,8 +34,8 @@ pub enum PreToken { /// The end of an indented block. IndentEnd, - /// The context for blank lines. - BlankLinesContext(LineSpacingPolicy), + /// How to handle blank lines from this point onwards. + LineSpacingPolicy(LineSpacingPolicy), /// Literal text. Literal(String, SyntaxKind), @@ -55,8 +55,8 @@ impl std::fmt::Display for PreToken { PreToken::WordEnd => write!(f, ""), PreToken::IndentStart => write!(f, ""), PreToken::IndentEnd => write!(f, ""), - PreToken::BlankLinesContext(context) => { - write!(f, "", context) + PreToken::LineSpacingPolicy(policy) => { + write!(f, "", policy) } PreToken::Literal(value, kind) => { write!( @@ -138,12 +138,12 @@ impl TokenStream { /// Inserts a blank lines allowed context change. pub fn blank_lines_allowed(&mut self) { self.0 - .push(PreToken::BlankLinesContext(LineSpacingPolicy::Yes)); + .push(PreToken::LineSpacingPolicy(LineSpacingPolicy::Yes)); } /// Inserts a blank lines allowed between comments context change. pub fn blank_lines_allowed_between_comments(&mut self) { - self.0.push(PreToken::BlankLinesContext( + self.0.push(PreToken::LineSpacingPolicy( LineSpacingPolicy::BetweenComments, )); } From 58eeff31114a7a011c782e6db8f89b2092e35711 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 13:17:28 -0400 Subject: [PATCH 23/60] feat: implement LineSpacingPolicy --- wdl-format/src/token/post.rs | 68 ++++++++++-------- wdl-format/src/v1.rs | 1 + wdl-format/src/v1/task.rs | 9 +-- wdl-format/src/v1/workflow.rs | 4 ++ .../source.formatted.wdl | 72 ------------------- .../source.formatted.wdl | 1 - .../source.formatted.wdl | 1 - .../interrupt_example/source.formatted.wdl | 1 + .../format/seaseq-case/source.formatted.wdl | 15 ---- 9 files changed, 51 insertions(+), 121 deletions(-) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 3409021ea..ae22117eb 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -78,7 +78,10 @@ pub struct Postprocessor { interrupted: bool, /// Whether blank lines are allowed in the current context. - blank_lines_allowed: LineSpacingPolicy, + line_spacing_policy: LineSpacingPolicy, + + /// Whether the last token was a comment. + prior_is_comment: bool, } impl Postprocessor { @@ -108,6 +111,7 @@ impl Postprocessor { match token { PreToken::BlankLine => { self.blank_line(stream); + self.prior_is_comment = false; } PreToken::LineEnd => { self.interrupted = false; @@ -132,7 +136,7 @@ impl Postprocessor { self.end_line(stream); } PreToken::LineSpacingPolicy(policy) => { - self.blank_lines_allowed = policy; + self.line_spacing_policy = policy; } PreToken::Literal(value, kind) => { assert!(kind != SyntaxKind::Comment && kind != SyntaxKind::Whitespace); @@ -150,41 +154,49 @@ impl Postprocessor { } stream.push(PostToken::Literal(value.to_owned())); self.position = LinePosition::MiddleOfLine; + self.prior_is_comment = false; } PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => { - if self.blank_lines_allowed == LineSpacingPolicy::Yes { - self.blank_line(stream); - } else { - todo!("handle line spacing policy") - } - } - Trivia::Comment(comment) => match comment { - Comment::Preceding(value) => { - if !matches!( - stream.0.last(), - Some(&PostToken::Newline) | Some(&PostToken::Indent) | None - ) { - self.interrupted = true; + match self.line_spacing_policy { + LineSpacingPolicy::Yes => { + self.blank_line(stream); + } + LineSpacingPolicy::BetweenComments => { + if self.prior_is_comment || matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { + self.blank_line(stream); + } } - self.end_line(stream); - stream.push(PostToken::Literal(value.to_owned())); - self.position = LinePosition::MiddleOfLine; - self.end_line(stream); } - Comment::Inline(value) => { - assert!(self.position == LinePosition::MiddleOfLine); - if let Some(next) = next { - if next != &PreToken::LineEnd { + } + Trivia::Comment(comment) => { + match comment { + Comment::Preceding(value) => { + if !matches!( + stream.0.last(), + Some(&PostToken::Newline) | Some(&PostToken::Indent) | None + ) { self.interrupted = true; } + self.end_line(stream); + stream.push(PostToken::Literal(value.to_owned())); + self.position = LinePosition::MiddleOfLine; + } + Comment::Inline(value) => { + assert!(self.position == LinePosition::MiddleOfLine); + if let Some(next) = next { + if next != &PreToken::LineEnd { + self.interrupted = true; + } + } + self.trim_last_line(stream); + stream.push(PostToken::Space); + stream.push(PostToken::Space); + stream.push(PostToken::Literal(value.to_owned())); } - self.trim_last_line(stream); - stream.push(PostToken::Space); - stream.push(PostToken::Space); - stream.push(PostToken::Literal(value.to_owned())); - self.end_line(stream); } + self.end_line(stream); + self.prior_is_comment = true; }, }, } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index 859f46682..a66c15f6f 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -54,6 +54,7 @@ pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { a_uri.as_str().cmp(b_uri.as_str()) }); + stream.blank_lines_allowed_between_comments(); for import in imports { (&import).write(stream); } diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index db5fb409d..c17cf6d2b 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -12,6 +12,8 @@ use crate::element::FormatElement; pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("task definition children"); + stream.blank_lines_allowed_between_comments(); + let task_keyword = children.next().expect("task keyword"); assert!(task_keyword.element().kind() == SyntaxKind::TaskKeyword); (&task_keyword).write(stream); @@ -95,13 +97,12 @@ pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream< stream.blank_line(); } - let need_blank = !body.is_empty(); + stream.blank_lines_allowed(); for child in body { (&child).write(stream); } - if need_blank { - stream.blank_line(); - } + stream.blank_lines_allowed_between_comments(); + stream.blank_line(); if let Some(command) = command { (&command).write(stream); diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs index 87034a509..db866210b 100644 --- a/wdl-format/src/v1/workflow.rs +++ b/wdl-format/src/v1/workflow.rs @@ -95,6 +95,8 @@ pub fn format_scatter_statement(element: &FormatElement, stream: &mut TokenStrea pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("workflow definition children"); + stream.blank_lines_allowed_between_comments(); + let workflow_keyword = children.next().expect("workflow keyword"); assert!(workflow_keyword.element().kind() == SyntaxKind::WorkflowKeyword); (&workflow_keyword).write(stream); @@ -174,9 +176,11 @@ pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStr stream.blank_line(); } + stream.blank_lines_allowed(); for child in body { (&child).write(stream); } + stream.blank_lines_allowed_between_comments(); stream.blank_line(); if let Some(output) = output { diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl index c2c43d2e7..5c1d9d19c 100644 --- a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -7,21 +7,16 @@ struct RuntimeEnvironment { } workflow chip { - meta { version: "v2.2.2" - author: "Jin wook Lee" email: "leepc12@gmail.com" description: "ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil." organization: "ENCODE DCC" - specification_document: "https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing" - default_docker: "encodedcc/chip-seq-pipeline:v2.2.2" default_singularity: "https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif" croo_out_def: "https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json" - parameter_group: { runtime_environment: { title: "Runtime environment", @@ -323,7 +318,6 @@ workflow chip { group: "input_genomic_data", help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate's 2nd pseudos.", } - ctl_paired_end: { description: "Sequencing endedness for all controls.", group: "input_genomic_data_control", @@ -461,7 +455,6 @@ workflow chip { group: "input_genomic_data_control", help: "Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].", } - pipeline_type: { description: "Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.", group: "pipeline_parameter", @@ -500,7 +493,6 @@ workflow chip { description: "Enables GC bias calculation.", group: "pipeline_parameter", } - aligner: { description: "Aligner. bowtie2, bwa or custom", group: "alignment", @@ -648,7 +640,6 @@ workflow chip { group: "peak_calling", help: "If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.", } - cap_num_peak: { description: "Upper limit on the number of peaks.", group: "peak_calling", @@ -668,7 +659,6 @@ workflow chip { description: "IDR threshold.", group: "peak_calling", } - align_cpu: { description: "Number of cores for task align.", group: "resource_parameter", @@ -919,7 +909,6 @@ workflow chip { File? peak_ppr1 File? peak_ppr2 File? peak_pooled - Boolean? ctl_paired_end Array[Boolean] ctl_paired_ends = [] Array[File] ctl_fastqs_rep1_R1 = [] @@ -996,44 +985,35 @@ workflow chip { Int align_time_hr = 48 Float align_bowtie2_disk_factor = 8.0 Float align_bwa_disk_factor = 8.0 - Int filter_cpu = 4 Float filter_mem_factor = 0.4 Int filter_time_hr = 24 Float filter_disk_factor = 8.0 - Int bam2ta_cpu = 2 Float bam2ta_mem_factor = 0.35 Int bam2ta_time_hr = 6 Float bam2ta_disk_factor = 4.0 - Float spr_mem_factor = 20.0 Float spr_disk_factor = 30.0 - Int jsd_cpu = 4 Float jsd_mem_factor = 0.1 Int jsd_time_hr = 6 Float jsd_disk_factor = 2.0 - Int xcor_cpu = 2 Float xcor_mem_factor = 1.0 Int xcor_time_hr = 24 Float xcor_disk_factor = 4.5 - Float subsample_ctl_mem_factor = 22.0 Float subsample_ctl_disk_factor = 15.0 - Float macs2_signal_track_mem_factor = 12.0 Int macs2_signal_track_time_hr = 24 Float macs2_signal_track_disk_factor = 80.0 - Int call_peak_cpu = 6 Float call_peak_spp_mem_factor = 5.0 Float call_peak_macs2_mem_factor = 5.0 Int call_peak_time_hr = 72 Float call_peak_spp_disk_factor = 5.0 Float call_peak_macs2_disk_factor = 30.0 - String? align_trimmomatic_java_heap String? filter_picard_java_heap String? gc_bias_picard_java_heap @@ -2529,9 +2509,7 @@ task align { Int crop_length Int crop_length_tol String? trimmomatic_phred_score_format - String aligner - String mito_chr_name Int? multimapping File? custom_align_py @@ -2540,13 +2518,11 @@ task align { Boolean use_bwa_mem_for_pe Int bwa_mem_read_len_limit Boolean use_bowtie2_local_mode - String? trimmomatic_java_heap Int cpu Float mem_factor Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -2703,7 +2679,6 @@ task align { time: time_hr disks: "local-disk ~{disk_gb} SSD" preemptible: 0 - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -2723,13 +2698,11 @@ task filter { File chrsz # 2-col chromosome sizes file Boolean no_dup_removal # no dupe reads removal when filtering BAM String mito_chr_name - Int cpu Float mem_factor String? picard_java_heap Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -2785,7 +2758,6 @@ task filter { memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -2803,7 +2775,6 @@ task bam2ta { Float mem_factor Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -2836,7 +2807,6 @@ task bam2ta { memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -2848,10 +2818,8 @@ task spr { File? ta Boolean paired_end Int pseudoreplication_random_seed - Float mem_factor Float disk_factor - RuntimeEnvironment runtime_environment } @@ -2880,7 +2848,6 @@ task spr { memory: "~{mem_gb} GB" time: 4 disks: "local-disk ~{disk_gb} SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -2913,7 +2880,6 @@ task pool_ta { memory: "8 GB" time: 4 disks: "local-disk 100 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -2931,12 +2897,10 @@ task xcor { String? chip_seq_type Int? exclusion_range_min Int? exclusion_range_max - Int cpu Float mem_factor Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -2974,7 +2938,6 @@ task xcor { memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -2987,12 +2950,10 @@ task jsd { Array[File?] ctl_bams File? blacklist Int mapq_thresh - Int cpu Float mem_factor Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -3023,7 +2984,6 @@ task jsd { memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3041,7 +3001,6 @@ task choose_ctl { # then always use pooled control for all exp rep. Int ctl_depth_limit Float exp_ctl_depth_ratio_limit - RuntimeEnvironment runtime_environment } @@ -3075,7 +3034,6 @@ task choose_ctl { memory: "4 GB" time: 4 disks: "local-disk 50 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3110,7 +3068,6 @@ task count_signal_track { memory: "~{mem_gb} GB" time: 4 disks: "local-disk 50 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3122,10 +3079,8 @@ task subsample_ctl { File? ta Boolean paired_end Int subsample - Float mem_factor Float disk_factor - RuntimeEnvironment runtime_environment } @@ -3152,7 +3107,6 @@ task subsample_ctl { memory: "~{mem_gb} GB" time: 4 disks: "local-disk ~{disk_gb} SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3174,12 +3128,10 @@ task call_peak { File? blacklist # blacklist BED to filter raw peaks String? regex_bfilt_peak_chr_name - Int cpu Float mem_factor Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -3242,7 +3194,6 @@ task call_peak { time: time_hr disks: "local-disk ~{disk_gb} SSD" preemptible: 0 - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3261,7 +3212,6 @@ task macs2_signal_track { Float mem_factor Int time_hr Float disk_factor - RuntimeEnvironment runtime_environment } @@ -3291,7 +3241,6 @@ task macs2_signal_track { time: time_hr disks: "local-disk ~{disk_gb} SSD" preemptible: 0 - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3313,7 +3262,6 @@ task idr { File chrsz # 2-col chromosome sizes file String peak_type String rank - RuntimeEnvironment runtime_environment } @@ -3357,7 +3305,6 @@ task idr { memory: "4 GB" time: 4 disks: "local-disk 50 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3377,7 +3324,6 @@ task overlap { Int? fraglen # fragment length from xcor (for FRIP) File chrsz # 2-col chromosome sizes file String peak_type - RuntimeEnvironment runtime_environment } @@ -3417,7 +3363,6 @@ task overlap { memory: "4 GB" time: 4 disks: "local-disk 50 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3473,7 +3418,6 @@ task reproducibility { memory: "4 GB" time: 4 disks: "local-disk 50 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3484,9 +3428,7 @@ task gc_bias { input { File? nodup_bam File ref_fa - String? picard_java_heap - RuntimeEnvironment runtime_environment } @@ -3516,7 +3458,6 @@ task gc_bias { memory: "~{mem_gb} GB" time: 6 disks: "local-disk 250 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3573,23 +3514,17 @@ task qc_report { File? frip_overlap_qc_ppr File? idr_reproducibility_qc File? overlap_reproducibility_qc - Array[File] gc_plots - Array[File] peak_region_size_qcs Array[File] peak_region_size_plots Array[File] num_peak_qcs - File? idr_opt_peak_region_size_qc File? idr_opt_peak_region_size_plot File? idr_opt_num_peak_qc - File? overlap_opt_peak_region_size_qc File? overlap_opt_peak_region_size_plot File? overlap_opt_num_peak_qc - File? qc_json_ref - RuntimeEnvironment runtime_environment } @@ -3671,7 +3606,6 @@ task qc_report { memory: "4 GB" time: 4 disks: "local-disk 50 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3683,7 +3617,6 @@ task read_genome_tsv { input { File? genome_tsv String? null_s - RuntimeEnvironment runtime_environment } @@ -3743,7 +3676,6 @@ task read_genome_tsv { memory: "2 GB" time: 4 disks: "local-disk 10 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3753,7 +3685,6 @@ task read_genome_tsv { task rounded_mean { input { Array[Int] ints - RuntimeEnvironment runtime_environment } @@ -3779,7 +3710,6 @@ task rounded_mean { memory: "2 GB" time: 4 disks: "local-disk 10 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda @@ -3789,7 +3719,6 @@ task rounded_mean { task raise_exception { input { String msg - RuntimeEnvironment runtime_environment } @@ -3808,7 +3737,6 @@ task raise_exception { memory: "2 GB" time: 4 disks: "local-disk 10 SSD" - docker: runtime_environment.docker singularity: runtime_environment.singularity conda: runtime_environment.conda diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl index dc7e44561..19d6e391c 100644 --- a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl +++ b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -97,7 +97,6 @@ workflow test_wf { "c": 2, } } - } task test_task { diff --git a/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl index b5d7fa424..c3a8d1772 100644 --- a/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl +++ b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl @@ -1,7 +1,6 @@ version 1.1 import "fileA.wdl" as bar alias cows as horses alias cats as dogs - import "fileB.wdl" as foo import "fileC.wdl" alias qux as Qux diff --git a/wdl-format/tests/format/interrupt_example/source.formatted.wdl b/wdl-format/tests/format/interrupt_example/source.formatted.wdl index 492123837..4797ab7c7 100644 --- a/wdl-format/tests/format/interrupt_example/source.formatted.wdl +++ b/wdl-format/tests/format/interrupt_example/source.formatted.wdl @@ -9,4 +9,5 @@ workflow meta # interrupt { # how far should this bracket be indented? } + } diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl index df595fc04..15f75929a 100644 --- a/wdl-format/tests/format/seaseq-case/source.formatted.wdl +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -17,7 +17,6 @@ import "workflows/workflows/motifs.wdl" import "workflows/workflows/visualization.wdl" as viz workflow seaseq { - meta { title: "SEAseq Analysis" summary: "Single-End Antibody Sequencing (SEAseq) Pipeline" @@ -155,7 +154,6 @@ workflow seaseq { # group: analysis_parameter String? results_name Boolean run_motifs = true - } String pipeline_ver = "v2.0.0" @@ -782,10 +780,8 @@ workflow seaseq { Array[File?]? indv_s_zipfile = indv_fastqc.zipfile Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile - File? s_mergebam_htmlfile = mergebamfqc.htmlfile File? s_mergebam_zipfile = mergebamfqc.zipfile - File? uno_s_htmlfile = uno_fastqc.htmlfile File? uno_s_zipfile = uno_fastqc.zipfile File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile @@ -802,14 +798,12 @@ workflow seaseq { Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index - File? uno_s_sortedbam = mapping.sorted_bam File? uno_s_indexstatsbam = mapping.bam_index File? uno_s_bkbam = mapping.bklist_bam File? uno_s_bkindexbam = mapping.bklist_index File? uno_s_rmbam = mapping.mkdup_bam File? uno_s_rmindexbam = mapping.mkdup_index - File? s_mergebamfile = mergebam.mergebam File? s_mergebamindex = mergeindexstats.indexbam File? s_bkbam = merge_rmblklist.intersect_out @@ -858,13 +852,11 @@ workflow seaseq { #MOTIFS File? flankbedfile = flankbed.flankbedfile - File? ame_tsv = motifs.ame_tsv File? ame_html = motifs.ame_html File? ame_seq = motifs.ame_seq File? meme = motifs.meme_out File? meme_summary = motifs.meme_summary - File? summit_ame_tsv = flank.ame_tsv File? summit_ame_html = flank.ame_html File? summit_ame_seq = flank.ame_seq @@ -891,7 +883,6 @@ workflow seaseq { File? peak_comparison = peaksanno.peak_comparison File? gene_comparison = peaksanno.gene_comparison File? pdf_comparison = peaksanno.pdf_comparison - File? all_peak_promoters = all_peaksanno.peak_promoters File? all_peak_genebody = all_peaksanno.peak_genebody File? all_peak_window = all_peaksanno.peak_window @@ -899,7 +890,6 @@ workflow seaseq { File? all_peak_comparison = all_peaksanno.peak_comparison File? all_gene_comparison = all_peaksanno.gene_comparison File? all_pdf_comparison = all_peaksanno.pdf_comparison - File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody File? nomodel_peak_window = nomodel_peaksanno.peak_window @@ -907,7 +897,6 @@ workflow seaseq { File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison - File? sicer_peak_promoters = sicer_peaksanno.peak_promoters File? sicer_peak_genebody = sicer_peaksanno.peak_genebody File? sicer_peak_window = sicer_peaksanno.peak_window @@ -926,7 +915,6 @@ workflow seaseq { File? a_bigwig = vizall.bigwig File? a_norm_wig = vizall.norm_wig File? a_tdffile = vizall.tdffile - File? s_bigwig = vizsicer.bigwig File? s_norm_wig = vizsicer.norm_wig File? s_tdffile = vizsicer.tdffile @@ -936,15 +924,12 @@ workflow seaseq { Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile Array[File?]? s_qc_textfile = indv_summarystats.textfile File? s_qc_mergehtml = mergehtml.mergefile - File? s_uno_statsfile = uno_summarystats.statsfile File? s_uno_htmlfile = uno_summarystats.htmlfile File? s_uno_textfile = uno_summarystats.textfile - File? statsfile = merge_summarystats.statsfile File? htmlfile = merge_summarystats.htmlfile File? textfile = merge_summarystats.textfile - File? summaryhtml = select_first([ uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml, From d2d699f393dc6463af532a2b5accb3907ec2db4e Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 13:30:29 -0400 Subject: [PATCH 24/60] revise: simplify blank line logic --- wdl-format/src/token/post.rs | 8 +------- wdl-format/src/v1/expr.rs | 5 +---- .../ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl | 5 ----- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index ae22117eb..7dc96a2d7 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -79,9 +79,6 @@ pub struct Postprocessor { /// Whether blank lines are allowed in the current context. line_spacing_policy: LineSpacingPolicy, - - /// Whether the last token was a comment. - prior_is_comment: bool, } impl Postprocessor { @@ -111,7 +108,6 @@ impl Postprocessor { match token { PreToken::BlankLine => { self.blank_line(stream); - self.prior_is_comment = false; } PreToken::LineEnd => { self.interrupted = false; @@ -154,7 +150,6 @@ impl Postprocessor { } stream.push(PostToken::Literal(value.to_owned())); self.position = LinePosition::MiddleOfLine; - self.prior_is_comment = false; } PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => { @@ -163,7 +158,7 @@ impl Postprocessor { self.blank_line(stream); } LineSpacingPolicy::BetweenComments => { - if self.prior_is_comment || matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { + if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { self.blank_line(stream); } } @@ -196,7 +191,6 @@ impl Postprocessor { } } self.end_line(stream); - self.prior_is_comment = true; }, }, } diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index 4c6005012..cb3c0efc0 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -668,10 +668,7 @@ pub fn format_parenthesized_expr(element: &FormatElement, stream: &mut TokenStre pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("if expr children"); - let nested_else_if = match stream.last_literal_kind() { - Some(SyntaxKind::ElseKeyword) => true, - _ => false, - }; + let nested_else_if = matches!(stream.last_literal_kind(), Some(SyntaxKind::ElseKeyword)); let if_keyword = children.next().expect("if keyword"); assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword); diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl index 5c1d9d19c..ad4328a16 100644 --- a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -2859,7 +2859,6 @@ task pool_ta { Array[File?] tas Int? col # number of columns in pooled TA String? prefix # basename prefix - RuntimeEnvironment runtime_environment } @@ -3044,7 +3043,6 @@ task count_signal_track { input { File? ta # tag-align File chrsz # 2-col chromosome sizes file - RuntimeEnvironment runtime_environment } @@ -3125,7 +3123,6 @@ task call_peak { Int cap_num_peak # cap number of raw peaks called from MACS2 Float pval_thresh # p.value threshold for MACS2 Float? fdr_thresh # FDR threshold for SPP - File? blacklist # blacklist BED to filter raw peaks String? regex_bfilt_peak_chr_name Int cpu @@ -3208,7 +3205,6 @@ task macs2_signal_track { # chr. sizes file, or hs for human, ms for mouse) File chrsz # 2-col chromosome sizes file Float pval_thresh # p.value threshold - Float mem_factor Int time_hr Float disk_factor @@ -3380,7 +3376,6 @@ task reproducibility { File? peak_ppr # Peak file from pooled pseudo replicate. String peak_type File chrsz # 2-col chromosome sizes file - RuntimeEnvironment runtime_environment } From 502fca0d85e6f1f957e0ff3577550ca7d8ad3d3e Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 13:39:10 -0400 Subject: [PATCH 25/60] chore: cleanup after merge --- wdl-format/src/token/post.rs | 18 ++++++++---------- wdl/src/bin/wdl.rs | 6 ++++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 7dc96a2d7..00c04bb4e 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -152,18 +152,16 @@ impl Postprocessor { self.position = LinePosition::MiddleOfLine; } PreToken::Trivia(trivia) => match trivia { - Trivia::BlankLine => { - match self.line_spacing_policy { - LineSpacingPolicy::Yes => { + Trivia::BlankLine => match self.line_spacing_policy { + LineSpacingPolicy::Yes => { + self.blank_line(stream); + } + LineSpacingPolicy::BetweenComments => { + if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { self.blank_line(stream); } - LineSpacingPolicy::BetweenComments => { - if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { - self.blank_line(stream); - } - } } - } + }, Trivia::Comment(comment) => { match comment { Comment::Preceding(value) => { @@ -191,7 +189,7 @@ impl Postprocessor { } } self.end_line(stream); - }, + } }, } } diff --git a/wdl/src/bin/wdl.rs b/wdl/src/bin/wdl.rs index 71e5e7489..67ab754f5 100644 --- a/wdl/src/bin/wdl.rs +++ b/wdl/src/bin/wdl.rs @@ -35,11 +35,11 @@ use wdl::ast::Validator; use wdl::lint::LintVisitor; use wdl_analysis::AnalysisResult; use wdl_analysis::Analyzer; +use wdl_analysis::Rule; +use wdl_analysis::rules; use wdl_ast::Node; use wdl_format::Formatter; use wdl_format::element::node::AstNodeFormatExt as _; -use wdl_analysis::Rule; -use wdl_analysis::rules; /// Emits the given diagnostics to the output stream. /// @@ -242,6 +242,7 @@ pub struct CheckCommand { #[clap(value_name = "PATH")] pub path: PathBuf, + /// The analysis options. #[clap(flatten)] pub options: AnalysisOptions, } @@ -303,6 +304,7 @@ pub struct AnalyzeCommand { #[clap(value_name = "PATH")] pub path: PathBuf, + /// The analysis options. #[clap(flatten)] pub options: AnalysisOptions, From 5696b290b8f7236b668b1cd9e20f174f585fe0b9 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 13:44:35 -0400 Subject: [PATCH 26/60] revise(LineSpacingPolicy): BetweenComments->BeforeComments --- wdl-format/src/token.rs | 4 ++-- wdl-format/src/token/post.rs | 2 +- wdl-format/src/token/pre.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 68b7ebf00..a5ec1b649 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -91,8 +91,8 @@ pub enum Trivia { /// Whether optional blank lines are allowed in the current context. #[derive(Eq, PartialEq, Default, Debug, Clone, Copy)] pub enum LineSpacingPolicy { - /// Blank lines are allowed between comments. - BetweenComments, + /// Blank lines are allowed before comments. + BeforeComments, /// Blank lines are always allowed. #[default] Yes, diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 00c04bb4e..fac2d6aeb 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -156,7 +156,7 @@ impl Postprocessor { LineSpacingPolicy::Yes => { self.blank_line(stream); } - LineSpacingPolicy::BetweenComments => { + LineSpacingPolicy::BeforeComments => { if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { self.blank_line(stream); } diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 05f35ad57..cf04c49ef 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -144,7 +144,7 @@ impl TokenStream { /// Inserts a blank lines allowed between comments context change. pub fn blank_lines_allowed_between_comments(&mut self) { self.0.push(PreToken::LineSpacingPolicy( - LineSpacingPolicy::BetweenComments, + LineSpacingPolicy::BeforeComments, )); } From c5581ea6bc1d45521c3e76b157cb2816f6e06e20 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 13:46:45 -0400 Subject: [PATCH 27/60] Update Gauntlet.toml --- Gauntlet.toml | 164 +++++++++----------------------------------------- 1 file changed, 27 insertions(+), 137 deletions(-) diff --git a/Gauntlet.toml b/Gauntlet.toml index d515a62d6..99f583124 100644 --- a/Gauntlet.toml +++ b/Gauntlet.toml @@ -233,7 +233,7 @@ permalink = "https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/blob/1cf2b [[diagnostics]] document = "PacificBiosciences/HiFi-human-WGS-WDL:/workflows/tertiary_analysis/tertiary_analysis.wdl" -message = "tertiary_analysis.wdl:46:38: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Array[IndexData]]+`" +message = "tertiary_analysis.wdl:46:38: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Array[IndexData]]+`" permalink = "https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/blob/1cf2b2e80024290d0ec1ea93b6a279ea2de519b0/workflows/tertiary_analysis/tertiary_analysis.wdl/#L46" [[diagnostics]] @@ -893,7 +893,7 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026 [[diagnostics]] document = "biowdl/tasks:/sambamba.wdl" -message = "sambamba.wdl:157:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "sambamba.wdl:157:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026452d5dbae2/sambamba.wdl/#L157" [[diagnostics]] @@ -928,7 +928,7 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026 [[diagnostics]] document = "biowdl/tasks:/samtools.wdl" -message = "samtools.wdl:470:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "samtools.wdl:470:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026452d5dbae2/samtools.wdl/#L470" [[diagnostics]] @@ -968,7 +968,7 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026 [[diagnostics]] document = "biowdl/tasks:/umi.wdl" -message = "umi.wdl:39:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "umi.wdl:39:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026452d5dbae2/umi.wdl/#L39" [[diagnostics]] @@ -1406,31 +1406,16 @@ document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:457:40: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L457" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" -message = "BenchmarkSVs.wdl:457:49: error: cannot coerce type `Array[Int]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L457" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:485:43: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L485" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" -message = "BenchmarkSVs.wdl:485:52: error: cannot coerce type `Array[Int]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L485" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:772:43: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L772" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" -message = "BenchmarkSVs.wdl:772:52: error: cannot coerce type `Array[Int]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L772" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:77:28: error: type mismatch: expected type `String`, but found type `String?`" @@ -1441,31 +1426,16 @@ document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:789:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L789" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" -message = "BenchmarkSVs.wdl:789:49: error: cannot coerce type `Array[String]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L789" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:790:42: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L790" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" -message = "BenchmarkSVs.wdl:790:51: error: cannot coerce type `Array[Int]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L790" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:890:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L890" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" -message = "BenchmarkSVs.wdl:890:49: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L890" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/CleanSVs.wdl" message = "CleanSVs.wdl:23:29: error: type mismatch: expected type `String`, but found type `String?`" @@ -1541,31 +1511,16 @@ document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:613:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L613" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" -message = "BenchmarkVCFs.wdl:613:49: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L613" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:621:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L621" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" -message = "BenchmarkVCFs.wdl:621:49: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L621" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:626:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L626" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" -message = "BenchmarkVCFs.wdl:626:49: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L626" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:631:36: error: a placeholder cannot have more than one option" @@ -1573,12 +1528,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" -message = "BenchmarkVCFs.wdl:631:49: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L631" - -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" -message = "BenchmarkVCFs.wdl:91:54: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]`" +message = "BenchmarkVCFs.wdl:91:54: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L91" [[diagnostics]] @@ -1733,7 +1683,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/FunctionalEquivalence/subworkflows/FEEvaluation.wdl" -message = "FEEvaluation.wdl:124:35: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" +message = "FEEvaluation.wdl:124:35: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/FunctionalEquivalence/subworkflows/FEEvaluation.wdl/#L124" [[diagnostics]] @@ -1793,7 +1743,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl" -message = "Glimpse2ImputationAndCheckQC.wdl:70:39: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "Glimpse2ImputationAndCheckQC.wdl:70:39: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl/#L70" [[diagnostics]] @@ -1818,7 +1768,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl" -message = "Glimpse2ImputationAndCheckQC.wdl:83:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "Glimpse2ImputationAndCheckQC.wdl:83:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl/#L83" [[diagnostics]] @@ -2033,7 +1983,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/ImputationPipeline/Validation/SubsetWeightSet.wdl" -message = "SubsetWeightSet.wdl:28:66: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[SelfExclusiveSites]+`" +message = "SubsetWeightSet.wdl:28:66: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[SelfExclusiveSites]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/ImputationPipeline/Validation/SubsetWeightSet.wdl/#L28" [[diagnostics]] @@ -2073,7 +2023,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/LongReadRNABenchmark/IsoformDiscoveryBenchmarkTasks.wdl" -message = "IsoformDiscoveryBenchmarkTasks.wdl:69:32: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "IsoformDiscoveryBenchmarkTasks.wdl:69:32: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/LongReadRNABenchmark/IsoformDiscoveryBenchmarkTasks.wdl/#L69" [[diagnostics]] @@ -2086,21 +2036,11 @@ document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIn message = "ComputeIntervalBamStats.wdl:223:37: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L223" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" -message = "ComputeIntervalBamStats.wdl:223:50: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L223" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" message = "ComputeIntervalBamStats.wdl:270:37: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L270" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" -message = "ComputeIntervalBamStats.wdl:270:50: error: cannot coerce type `Array[File]` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L270" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" message = "ComputeIntervalBamStats.wdl:77:14: warning[UnusedInput]: unused input `ref_fasta`" @@ -2111,31 +2051,16 @@ document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.w message = "CreateIGVSession.wdl:51:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L51" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" -message = "CreateIGVSession.wdl:51:50: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L51" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" message = "CreateIGVSession.wdl:52:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L52" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" -message = "CreateIGVSession.wdl:52:50: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L52" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" message = "CreateIGVSession.wdl:53:46: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L53" -[[diagnostics]] -document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" -message = "CreateIGVSession.wdl:53:60: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L53" - [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/DownsampleAndCollectCoverage.wdl" message = "DownsampleAndCollectCoverage.wdl:173:91: error: type mismatch: expected type `Float?`, but found type `String?`" @@ -2153,7 +2078,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/MatchFingerprints.wdl" -message = "MatchFingerprints.wdl:67:33: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Array[String]]`" +message = "MatchFingerprints.wdl:67:33: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Array[String]]`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/MatchFingerprints.wdl/#L67" [[diagnostics]] @@ -2518,37 +2443,37 @@ permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e2955 [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:91:34: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:91:34: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L91" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:92:48: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:92:48: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L92" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:93:46: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:93:46: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L93" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:94:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:94:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L94" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:95:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:95:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L95" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:96:38: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:96:38: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L96" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:97:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:97:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L97" [[diagnostics]] @@ -2911,31 +2836,16 @@ document = "broadinstitute/warp:/tasks/broad/CopyFilesFromCloudToCloud.wdl" message = "CopyFilesFromCloudToCloud.wdl:71:10: error: type mismatch: expected type `Int` or type `Float`, but found type `String`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/CopyFilesFromCloudToCloud.wdl/#L71" -[[diagnostics]] -document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" -message = "Funcotator.wdl:163:118: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L163" - [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" message = "Funcotator.wdl:163:92: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L163" -[[diagnostics]] -document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" -message = "Funcotator.wdl:164:118: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L164" - [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" message = "Funcotator.wdl:164:89: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L164" -[[diagnostics]] -document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" -message = "Funcotator.wdl:165:121: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L165" - [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" message = "Funcotator.wdl:165:91: error: a placeholder cannot have more than one option" @@ -3131,11 +3041,6 @@ document = "broadinstitute/warp:/tasks/broad/Qc.wdl" message = "Qc.wdl:434:31: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Qc.wdl/#L434" -[[diagnostics]] -document = "broadinstitute/warp:/tasks/broad/Qc.wdl" -message = "Qc.wdl:434:46: error: cannot coerce type `Array[String]?` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Qc.wdl/#L434" - [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Qc.wdl" message = "Qc.wdl:436:46: error: a placeholder cannot have more than one option" @@ -3511,11 +3416,6 @@ document = "broadinstitute/warp:/verification/VerifyMetrics.wdl" message = "VerifyMetrics.wdl:73:11: warning[UnusedInput]: unused input `dependency_input`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyMetrics.wdl/#L73" -[[diagnostics]] -document = "broadinstitute/warp:/verification/VerifyMetrics.wdl" -message = "VerifyMetrics.wdl:87:117: error: cannot coerce type `Array[String]` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyMetrics.wdl/#L87" - [[diagnostics]] document = "broadinstitute/warp:/verification/VerifyMetrics.wdl" message = "VerifyMetrics.wdl:87:89: error: a placeholder cannot have more than one option" @@ -3796,11 +3696,6 @@ document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsJointGenotypin message = "VerifyUltimaGenomicsJointGenotyping.wdl:48:8: warning[UnusedCall]: unused call `CompareFingerprints`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsJointGenotyping.wdl/#L48" -[[diagnostics]] -document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" -message = "VerifyUltimaGenomicsWholeGenomeCramOnly.wdl:102:115: error: cannot coerce type `Array[String]` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl/#L102" - [[diagnostics]] document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" message = "VerifyUltimaGenomicsWholeGenomeCramOnly.wdl:102:87: error: a placeholder cannot have more than one option" @@ -3841,11 +3736,6 @@ document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeGer message = "VerifyUltimaGenomicsWholeGenomeGermline.wdl:161:11: warning[UnusedInput]: unused input `dependency_input`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl/#L161" -[[diagnostics]] -document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl" -message = "VerifyUltimaGenomicsWholeGenomeGermline.wdl:174:115: error: cannot coerce type `Array[String]` to `String`" -permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl/#L174" - [[diagnostics]] document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl" message = "VerifyUltimaGenomicsWholeGenomeGermline.wdl:174:87: error: a placeholder cannot have more than one option" @@ -4248,7 +4138,7 @@ permalink = "https://github.com/chanzuckerberg/czid-workflows/blob/a04293a527117 [[diagnostics]] document = "chanzuckerberg/czid-workflows:/workflows/bulk-download/run.wdl" -message = "run.wdl:91:31: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]`" +message = "run.wdl:91:31: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]`" permalink = "https://github.com/chanzuckerberg/czid-workflows/blob/a04293a5271176885ce7f876b6353b20da3f7b98/workflows/bulk-download/run.wdl/#L91" [[diagnostics]] @@ -4528,7 +4418,7 @@ permalink = "https://github.com/stjudecloud/workflows/blob/a56ad9b8c7de5c9b13350 [[diagnostics]] document = "stjudecloud/workflows:/workflows/chipseq/chipseq-standard.wdl" -message = "chipseq-standard.wdl:110:45: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Array[File]]+`" +message = "chipseq-standard.wdl:110:45: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Array[File]]+`" permalink = "https://github.com/stjudecloud/workflows/blob/a56ad9b8c7de5c9b13350f465c1543d0cb0b6b1a/workflows/chipseq/chipseq-standard.wdl/#L110" [[diagnostics]] @@ -4798,27 +4688,27 @@ permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be240 [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:33:58: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "wf_nextclade_addToRefTree.wdl:33:58: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L33" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:34:53: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "wf_nextclade_addToRefTree.wdl:34:53: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L34" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:35:51: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "wf_nextclade_addToRefTree.wdl:35:51: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L35" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:36:52: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "wf_nextclade_addToRefTree.wdl:36:52: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L36" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:37:57: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "wf_nextclade_addToRefTree.wdl:37:57: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L37" [[diagnostics]] @@ -5063,12 +4953,12 @@ permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be240 [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl" -message = "wf_theiaeuk_illumina_pe.wdl:118:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" +message = "wf_theiaeuk_illumina_pe.wdl:118:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl/#L118" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl" -message = "wf_theiaeuk_illumina_pe.wdl:127:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" +message = "wf_theiaeuk_illumina_pe.wdl:127:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl/#L127" [[diagnostics]] From 7e1bfa4b8e6f7f81701a94b08ad9805ca32886aa Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 13:50:40 -0400 Subject: [PATCH 28/60] chore: rm backup/ --- backup/wdl-format-old-2/CHANGELOG.md | 12 - backup/wdl-format-old-2/Cargo.toml | 28 - backup/wdl-format-old-2/src/element.rs | 0 backup/wdl-format-old-2/src/formatter.rs | 131 - backup/wdl-format-old-2/src/import.rs | 174 - backup/wdl-format-old-2/src/lib.rs | 190 - backup/wdl-format-old-2/src/metadata.rs | 365 -- backup/wdl-format-old-2/src/node.rs | 99 - backup/wdl-format-old-2/src/registry.rs | 373 -- backup/wdl-format-old-2/src/task.rs | 455 --- backup/wdl-format-old-2/src/token.rs | 82 - backup/wdl-format-old-2/src/tokens.rs | 2089 ----------- backup/wdl-format-old-2/src/v1.rs | 711 ---- backup/wdl-format-old-2/src/workflow.rs | 666 ---- backup/wdl-format-old-2/tests/format.rs | 192 - .../ENCODE-DCC_chip-seq-pipeline/LICENSE.txt | 25 - .../source.formatted.wdl | 1 - .../ENCODE-DCC_chip-seq-pipeline/source.wdl | 3296 ----------------- .../clays_complex_script/source.formatted.wdl | 7 - .../format/clays_complex_script/source.wdl | 165 - .../source.formatted.wdl | 1 - .../format/complex_meta_and_calls/source.wdl | 106 - .../source.formatted.wdl | 23 - .../imports_with_both_comments/source.wdl | 23 - .../source.formatted.wdl | 12 - .../imports_with_inline_comments/source.wdl | 12 - .../source.formatted.wdl | 5 - .../imports_with_no_comments/source.wdl | 7 - .../source.formatted.wdl | 23 - .../source.wdl | 23 - .../interrupt_example/source.formatted.wdl | 2 - .../tests/format/interrupt_example/source.wdl | 10 - .../tests/format/seaseq-case/LICENSE.txt | 205 - .../format/seaseq-case/source.formatted.wdl | 17 - .../tests/format/seaseq-case/source.wdl | 898 ----- backup/wdl-format-old/CHANGELOG.md | 12 - backup/wdl-format-old/Cargo.toml | 28 - backup/wdl-format-old/src/formatter.rs | 131 - backup/wdl-format-old/src/import.rs | 174 - backup/wdl-format-old/src/lib.rs | 190 - backup/wdl-format-old/src/metadata.rs | 365 -- backup/wdl-format-old/src/task.rs | 455 --- backup/wdl-format-old/src/v1.rs | 711 ---- backup/wdl-format-old/src/workflow.rs | 666 ---- backup/wdl-format-old/tests/format.rs | 192 - .../ENCODE-DCC_chip-seq-pipeline/LICENSE.txt | 25 - .../source.formatted.wdl | 1 - .../ENCODE-DCC_chip-seq-pipeline/source.wdl | 3296 ----------------- .../clays_complex_script/source.formatted.wdl | 7 - .../format/clays_complex_script/source.wdl | 165 - .../source.formatted.wdl | 1 - .../format/complex_meta_and_calls/source.wdl | 106 - .../source.formatted.wdl | 23 - .../imports_with_both_comments/source.wdl | 23 - .../source.formatted.wdl | 12 - .../imports_with_inline_comments/source.wdl | 12 - .../source.formatted.wdl | 5 - .../imports_with_no_comments/source.wdl | 7 - .../source.formatted.wdl | 23 - .../source.wdl | 23 - .../interrupt_example/source.formatted.wdl | 2 - .../tests/format/interrupt_example/source.wdl | 10 - .../tests/format/seaseq-case/LICENSE.txt | 205 - .../format/seaseq-case/source.formatted.wdl | 17 - .../tests/format/seaseq-case/source.wdl | 898 ----- 65 files changed, 18213 deletions(-) delete mode 100644 backup/wdl-format-old-2/CHANGELOG.md delete mode 100644 backup/wdl-format-old-2/Cargo.toml delete mode 100644 backup/wdl-format-old-2/src/element.rs delete mode 100644 backup/wdl-format-old-2/src/formatter.rs delete mode 100644 backup/wdl-format-old-2/src/import.rs delete mode 100644 backup/wdl-format-old-2/src/lib.rs delete mode 100644 backup/wdl-format-old-2/src/metadata.rs delete mode 100644 backup/wdl-format-old-2/src/node.rs delete mode 100644 backup/wdl-format-old-2/src/registry.rs delete mode 100644 backup/wdl-format-old-2/src/task.rs delete mode 100644 backup/wdl-format-old-2/src/token.rs delete mode 100644 backup/wdl-format-old-2/src/tokens.rs delete mode 100644 backup/wdl-format-old-2/src/v1.rs delete mode 100644 backup/wdl-format-old-2/src/workflow.rs delete mode 100644 backup/wdl-format-old-2/tests/format.rs delete mode 100644 backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt delete mode 100644 backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt delete mode 100644 backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl delete mode 100644 backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl delete mode 100644 backup/wdl-format-old/CHANGELOG.md delete mode 100644 backup/wdl-format-old/Cargo.toml delete mode 100644 backup/wdl-format-old/src/formatter.rs delete mode 100644 backup/wdl-format-old/src/import.rs delete mode 100644 backup/wdl-format-old/src/lib.rs delete mode 100644 backup/wdl-format-old/src/metadata.rs delete mode 100644 backup/wdl-format-old/src/task.rs delete mode 100644 backup/wdl-format-old/src/v1.rs delete mode 100644 backup/wdl-format-old/src/workflow.rs delete mode 100644 backup/wdl-format-old/tests/format.rs delete mode 100644 backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt delete mode 100644 backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/clays_complex_script/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/interrupt_example/source.wdl delete mode 100644 backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt delete mode 100644 backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl delete mode 100644 backup/wdl-format-old/tests/format/seaseq-case/source.wdl diff --git a/backup/wdl-format-old-2/CHANGELOG.md b/backup/wdl-format-old-2/CHANGELOG.md deleted file mode 100644 index 3eeeadd81..000000000 --- a/backup/wdl-format-old-2/CHANGELOG.md +++ /dev/null @@ -1,12 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## Unreleased - -### Added - -* Added the `wdl-format` crate for formatting WDL documents ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)). diff --git a/backup/wdl-format-old-2/Cargo.toml b/backup/wdl-format-old-2/Cargo.toml deleted file mode 100644 index eec06496c..000000000 --- a/backup/wdl-format-old-2/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "wdl-format" -version = "0.1.0" -license.workspace = true -edition.workspace = true -authors.workspace = true -homepage.workspace = true -repository.workspace = true - -[dependencies] -anyhow.workspace = true -wdl-ast = { path = "../wdl-ast", version = "0.5.0" } -wdl-grammar = { version = "0.6.0", path = "../wdl-grammar" } - -[dev-dependencies] -pretty_assertions = { workspace = true } -approx = { workspace = true } -rayon = { workspace = true } -colored = { workspace = true } -codespan-reporting = { workspace = true } - -[features] -codespan = ["wdl-ast/codespan"] - -[[test]] -name = "format" -required-features = ["codespan"] -harness = false diff --git a/backup/wdl-format-old-2/src/element.rs b/backup/wdl-format-old-2/src/element.rs deleted file mode 100644 index e69de29bb..000000000 diff --git a/backup/wdl-format-old-2/src/formatter.rs b/backup/wdl-format-old-2/src/formatter.rs deleted file mode 100644 index 6ef467b44..000000000 --- a/backup/wdl-format-old-2/src/formatter.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! Contains the `Formatter` struct, which is used to keep track of the -//! current formatting state. This includes the current indentation level and -//! whether the current line has been interrupted by comments. -//! The state becomes "interrupted" by comments when a comment forces a newline -//! where it would otherwise not be expected. In this case, the next line(s) -//! will be indented by one level. - -use crate::Formattable; -use crate::NEWLINE; - -/// Space constant used for formatting. -pub const SPACE: &str = " "; -/// Indentation constant used for formatting. Indentation is four spaces -/// per-level. -pub const INDENT: &str = " "; -/// Inline comment space constant used for formatting. -/// -/// Inline comments should start two spaces after the end of the element they -/// are commenting on. -pub const INLINE_COMMENT_SPACE: &str = " "; - -/// The `Formatter` struct is used to keep track of the current formatting -/// state. This includes the current indentation level and whether the current -/// line has been interrupted by comments. -#[derive(Debug, Clone, Copy, Default)] -pub struct Formatter { - /// The current indentation level. - indent_level: usize, - /// Whether the current line has been interrupted by comments. - interrupted_by_comments: bool, -} - -impl Formatter { - /// Format an element. - pub fn format( - mut self, - element: &T, - writer: &mut F, - ) -> std::fmt::Result { - element.format(writer, &mut self) - } - - /// Add the current indentation to the writer. - /// The indentation level will be temporarily increased by one if the - /// current line has been interrupted by comments. - pub fn indent(&self, writer: &mut T) -> std::fmt::Result { - write!( - writer, - "{}", - INDENT.repeat(self.indent_level + (if self.interrupted_by_comments { 1 } else { 0 })) - ) - } - - /// Add a space or an indentation to the writer. If the current line has - /// been interrupted by comments, an indentation is added. Otherwise, a - /// space is added. - pub fn space_or_indent(&mut self, writer: &mut T) -> std::fmt::Result { - if !self.interrupted_by_comments { - write!(writer, "{}", SPACE)?; - } else { - self.indent(writer)?; - } - self.reset_interrupted(); - Ok(()) - } - - /// Add a level of indentation. - pub fn increment_indent(&mut self) { - self.indent_level += 1; - self.reset_interrupted(); - } - - /// Remove a level of indentation. - pub fn decrement_indent(&mut self) { - self.indent_level = self.indent_level.saturating_sub(1); - self.reset_interrupted(); - } - - /// Check if the current line has been interrupted by comments. - pub fn interrupted(&self) -> bool { - self.interrupted_by_comments - } - - /// Interrupt the current line with comments. - pub fn interrupt(&mut self) { - self.interrupted_by_comments = true; - } - - /// Reset the interrupted state. - pub fn reset_interrupted(&mut self) { - self.interrupted_by_comments = false; - } - - pub fn format_preceding_trivia( - &mut self, - writer: &mut F, - comments: Box<[String]>, - would_be_interrupting: bool, - respect_blank_lines: bool, - ) -> std::fmt::Result { - if would_be_interrupting && !comments.is_empty() && !self.interrupted_by_comments { - write!(writer, "{}", NEWLINE)?; - self.interrupt(); - } - for comment in comments { - if !respect_blank_lines && !comment.starts_with('#') { - continue; - } - self.indent(writer)?; - write!(writer, "{}{}", comment, NEWLINE)?; - } - Ok(()) - } - - pub fn format_inline_comment( - &mut self, - writer: &mut F, - comment: Option, - would_be_interrupting: bool, - ) -> std::fmt::Result { - if let Some(ref comment) = comment { - write!(writer, "{}{}{}", INLINE_COMMENT_SPACE, comment, NEWLINE)?; - } - if would_be_interrupting && comment.is_some() { - self.interrupt(); - } else if !would_be_interrupting && comment.is_none() { - write!(writer, "{}", NEWLINE)?; - } - Ok(()) - } -} diff --git a/backup/wdl-format-old-2/src/import.rs b/backup/wdl-format-old-2/src/import.rs deleted file mode 100644 index 5cc1cb96b..000000000 --- a/backup/wdl-format-old-2/src/import.rs +++ /dev/null @@ -1,174 +0,0 @@ -//! Format import statements. - -use wdl_ast::token_child; -use wdl_ast::v1::AliasKeyword; -use wdl_ast::v1::AsKeyword; -use wdl_ast::v1::ImportAlias; -use wdl_ast::v1::ImportKeyword; -use wdl_ast::v1::ImportStatement; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::Ident; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; -use wdl_grammar::SyntaxExt; - -use crate::Formattable; -use crate::Formatter; - -impl Formattable for ImportKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for AsKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for AliasKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for ImportAlias { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; - - let alias_keyword = self.alias_keyword(); - formatter.space_or_indent(writer)?; - alias_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, alias_keyword.syntax().inline_comment(), true)?; - - let (source, target) = self.names(); - - formatter.format_preceding_trivia( - writer, - source.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - source.format(writer, formatter)?; - formatter.format_inline_comment(writer, source.syntax().inline_comment(), true)?; - - let as_keyword = self.as_keyword(); - formatter.format_preceding_trivia( - writer, - as_keyword.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - as_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; - - formatter.format_preceding_trivia( - writer, - target.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - target.format(writer, formatter)?; - - formatter.format_inline_comment(writer, self.syntax().inline_comment(), true) - } -} - -impl Formattable for ImportStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - formatter.format_preceding_trivia( - writer, - self.syntax().preceding_trivia(), - false, - false, - )?; - - let import_keyword = self.keyword(); - import_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, import_keyword.syntax().inline_comment(), true)?; - - let uri = self.uri(); - formatter.format_preceding_trivia(writer, uri.syntax().preceding_trivia(), true, false)?; - formatter.space_or_indent(writer)?; - uri.format(writer, formatter)?; - formatter.format_inline_comment(writer, uri.syntax().inline_comment(), true)?; - - let as_keyword = token_child::(self.syntax()); - if let Some(as_keyword) = as_keyword { - formatter.format_preceding_trivia( - writer, - as_keyword.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - as_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; - - let ident = self - .explicit_namespace() - .expect("import with as clause should have an explicit namespace"); - formatter.format_preceding_trivia( - writer, - ident.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - ident.format(writer, formatter)?; - formatter.format_inline_comment(writer, ident.syntax().inline_comment(), true)?; - } - - for alias in self.aliases() { - alias.format(writer, formatter)?; - } - - formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) - } -} - -/// Sorts import statements by their core components. -/// -/// The core components of an import statement are the URI and the namespace. -/// These two elements guarantee a unique import statement. -pub fn sort_imports(a: &ImportStatement, b: &ImportStatement) -> std::cmp::Ordering { - ( - a.uri() - .text() - .expect("import URI cannot have placeholders") - .as_str(), - &a.namespace().expect("import namespace should exist").0, - ) - .cmp(&( - b.uri() - .text() - .expect("import URI cannot have placeholders") - .as_str(), - &b.namespace().expect("import namespace should exist").0, - )) -} diff --git a/backup/wdl-format-old-2/src/lib.rs b/backup/wdl-format-old-2/src/lib.rs deleted file mode 100644 index 283a2c77f..000000000 --- a/backup/wdl-format-old-2/src/lib.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! A library for auto-formatting WDL code. - -#![warn(missing_docs)] -#![warn(rust_2018_idioms)] -#![warn(rust_2021_compatibility)] -#![warn(missing_debug_implementations)] -#![warn(clippy::missing_docs_in_private_items)] -#![warn(rustdoc::broken_intra_doc_links)] - -use anyhow::Result; -use wdl_ast::token_child; -use wdl_ast::v1::VersionKeyword; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::Diagnostic; -use wdl_ast::Document; -use wdl_ast::Ident; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; -use wdl_ast::SyntaxNode; -use wdl_ast::Validator; -use wdl_ast::Version; -use wdl_ast::VersionStatement; -use wdl_grammar::SyntaxExt; - -mod formatter; -mod import; -// mod metadata; -mod task; -mod v1; -mod workflow; - -use formatter::Formatter; - -/// Newline constant used for formatting on windows platforms. -#[cfg(windows)] -pub const NEWLINE: &str = "\r\n"; -/// Newline constant used for formatting on non-windows platforms. -#[cfg(not(windows))] -pub const NEWLINE: &str = "\n"; -/// String terminator constant used for formatting. -const STRING_TERMINATOR: char = '"'; -/// Lint directive prefix constant used for formatting. -const LINT_DIRECTIVE_PREFIX: &str = "#@"; - -/// A trait for elements that can be formatted. -pub trait Formattable { - /// Format the element and write it to the writer. - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result; -} - -impl Formattable for VersionKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for Version { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for VersionStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - let mut preamble_comments = Vec::new(); - let mut lint_directives = Vec::new(); - - for comment in self.syntax().preceding_trivia() { - if comment.starts_with(LINT_DIRECTIVE_PREFIX) { - lint_directives.push(comment); - } else if comment.starts_with('#') { - preamble_comments.push(comment); - } // else is just a newline - } - - for comment in preamble_comments.iter() { - write!(writer, "{}{}", comment, NEWLINE)?; - } - - // If there are preamble comments, ensure a blank line is inserted - if !preamble_comments.is_empty() { - write!(writer, "{}", NEWLINE)?; - } - - for comment in lint_directives.iter() { - write!(writer, "{}{}", comment, NEWLINE)?; - } - - let version_keyword = self.keyword(); - version_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, version_keyword.syntax().inline_comment(), true)?; - - let version = self.version(); - formatter.format_preceding_trivia( - writer, - version.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - version.format(writer, formatter)?; - formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) - } -} - -impl Formattable for Ident { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for Document { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - let ast = self.ast(); - let ast = ast.as_v1().expect("document should be a v1 document"); - let version_statement = self - .version_statement() - .expect("document should have a version statement"); - version_statement.format(writer, formatter)?; - let mut imports = ast.imports().collect::>(); - if !imports.is_empty() { - write!(writer, "{}", NEWLINE)?; - } - imports.sort_by(import::sort_imports); - for import in imports { - import.format(writer, formatter)?; - } - for item in ast.items() { - if item.syntax().kind() == SyntaxKind::ImportStatementNode { - continue; - } - // write!(writer, "{}", NEWLINE)?; - // item.format(writer, formatter)?; - } - Ok(()) - } -} - -/// Format a WDL document. -pub fn format_document(code: &str) -> Result> { - let (document, diagnostics) = Document::parse(code); - if !diagnostics.is_empty() { - return Err(diagnostics); - } - let mut validator = Validator::default(); - match validator.validate(&document) { - std::result::Result::Ok(_) => { - // The document is valid, so we can format it. - } - Err(diagnostics) => return Err(diagnostics), - } - - let mut result = String::new(); - let formatter = &mut Formatter::default(); - - match formatter.format(&document, &mut result) { - Ok(_) => {} - Err(error) => { - let msg = format!("Failed to format document: {}", error); - return Err(vec![Diagnostic::error(msg)]); - } - } - - Ok(result) -} diff --git a/backup/wdl-format-old-2/src/metadata.rs b/backup/wdl-format-old-2/src/metadata.rs deleted file mode 100644 index 9bb557f29..000000000 --- a/backup/wdl-format-old-2/src/metadata.rs +++ /dev/null @@ -1,365 +0,0 @@ -//! A module for formatting metadata sections (meta and parameter_meta). - -use wdl_ast::v1::LiteralNull; -use wdl_ast::v1::MetadataArray; -use wdl_ast::v1::MetadataObject; -use wdl_ast::v1::MetadataObjectItem; -use wdl_ast::v1::MetadataSection; -use wdl_ast::v1::MetadataValue; -use wdl_ast::v1::ParameterMetadataSection; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; - -use super::comments::format_inline_comment; -use super::comments::format_preceding_comments; -use super::first_child_of_kind; -use super::format_element_with_comments; -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::LinePosition; -use super::NEWLINE; - -impl Formattable for LiteralNull { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) - } -} - -impl Formattable for MetadataObject { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - format_element_with_comments( - &open_brace, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - let mut commas = self - .syntax() - .children_with_tokens() - .filter(|c| c.kind() == SyntaxKind::Comma); - - for item in self.items() { - item.format(writer, formatter)?; - if let Some(cur_comma) = commas.next() { - format_element_with_comments( - &cur_comma, - writer, - formatter, - LinePosition::End, - |_, _| Ok(()), - )?; - } else { - // No trailing comma was in the input - write!(writer, ",")?; - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - format_preceding_comments(&close_brace, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_brace)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - true, - ) - } -} - -impl Formattable for MetadataArray { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let open_bracket = first_child_of_kind(self.syntax(), SyntaxKind::OpenBracket); - format_element_with_comments( - &open_bracket, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - let mut commas = self - .syntax() - .children_with_tokens() - .filter(|c| c.kind() == SyntaxKind::Comma); - - for item in self.elements() { - formatter.indent(writer)?; - item.format(writer, formatter)?; - if let Some(cur_comma) = commas.next() { - format_element_with_comments( - &cur_comma, - writer, - formatter, - LinePosition::End, - |_, _| Ok(()), - )?; - } else { - // No trailing comma was in the input - write!(writer, ",")?; - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_bracket = first_child_of_kind(self.syntax(), SyntaxKind::CloseBracket); - format_preceding_comments(&close_bracket, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_bracket)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - true, - ) - } -} - -impl Formattable for MetadataValue { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - MetadataValue::String(s) => s.format(writer, formatter), - MetadataValue::Boolean(b) => b.format(writer, formatter), - MetadataValue::Float(f) => f.format(writer, formatter), - MetadataValue::Integer(i) => i.format(writer, formatter), - MetadataValue::Null(n) => n.format(writer, formatter), - MetadataValue::Object(o) => o.format(writer, formatter), - MetadataValue::Array(a) => a.format(writer, formatter), - } - } -} - -impl Formattable for MetadataObjectItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let name = self.name(); - formatter.indent(writer)?; - name.format(writer, formatter)?; - format_inline_comment( - &SyntaxElement::from(name.syntax().clone()), - writer, - formatter, - true, - )?; - - let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - format_element_with_comments( - &colon, - writer, - formatter, - LinePosition::Middle, - |writer, formatter| { - if formatter.interrupted() { - formatter.indent(writer)?; - formatter.reset_interrupted(); - } - Ok(()) - }, - )?; - - let value = self.value(); - format_preceding_comments( - &SyntaxElement::from(value.syntax().clone()), - writer, - formatter, - true, - )?; - formatter.space_or_indent(writer)?; - value.format(writer, formatter)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - true, - ) - } -} - -impl Formattable for MetadataSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let meta_keyword = first_child_of_kind(self.syntax(), SyntaxKind::MetaKeyword); - formatter.indent(writer)?; - write!(writer, "{}", meta_keyword)?; - format_inline_comment(&meta_keyword, writer, formatter, true)?; - - let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - format_element_with_comments( - &open_brace, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } else { - write!(writer, "{}", SPACE)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - for item in self.items() { - item.format(writer, formatter)?; - if formatter.interrupted() { - formatter.reset_interrupted(); - } else { - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - format_preceding_comments(&close_brace, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_brace)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - ) - } -} - -impl Formattable for ParameterMetadataSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let parameter_meta_keyword = - first_child_of_kind(self.syntax(), SyntaxKind::ParameterMetaKeyword); - formatter.indent(writer)?; - write!(writer, "{}", parameter_meta_keyword)?; - format_inline_comment(¶meter_meta_keyword, writer, formatter, true)?; - - let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - format_element_with_comments( - &open_brace, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } else { - write!(writer, "{}", SPACE)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - for item in self.items() { - item.format(writer, formatter)?; - if formatter.interrupted() { - formatter.reset_interrupted(); - } else { - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - format_preceding_comments(&close_brace, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_brace)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - ) - } -} diff --git a/backup/wdl-format-old-2/src/node.rs b/backup/wdl-format-old-2/src/node.rs deleted file mode 100644 index e44d720c2..000000000 --- a/backup/wdl-format-old-2/src/node.rs +++ /dev/null @@ -1,99 +0,0 @@ -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::Token; -use wdl_ast::WorkflowDescriptionLanguage; - -use crate::TokenStream; -use crate::Writable; - -type DynAstNode<'a> = &'a dyn AstNode; -type DynAstToken<'a> = &'a dyn AstToken; - -pub struct FormatNode<'a>(&'a dyn DynAstNode); - -impl<'a> FormatNode<'a> { - pub fn new + 'a>(value: &'a T) -> Self { - Self(value as DynAstNode) - } - - pub fn collate(&self) -> FormatElement<'_> {} -} - -pub trait AstNodeFormatExt: AstNode { - fn as_format_node(&self) -> FormatNode<'_> - where - Self: Sized, - { - FormatNode::new(self) - } -} - -impl> AstNodeFormatExt for T {} - -pub struct FormatToken<'a>(DynAstToken<'a>); - -impl<'a> FormatToken<'a> { - pub fn new(value: &'a T) -> Self { - Self(value as DynAstToken) - } -} - -pub trait AstTokenFormatExt: AstToken { - fn as_format_token(&self) -> FormatToken<'_> - where - Self: Sized, - { - FormatToken::new(self) - } -} - -impl AstTokenFormatExt for T {} - -impl<'a> Writable<'a> for FormatToken<'a> { - fn write(&self, stream: &mut TokenStream<'a>) { - stream.write(self.0.as_str()); - } -} - -pub enum FormatElement<'a> { - Node(FormatNode<'a>), - Token(FormatToken<'a>), -} - -impl From for FormatElement<'_> { - fn from(value: Token) -> Self {} -} - -#[cfg(test)] -mod tests { - use wdl_ast::Document; - - use crate::node::AstNodeFormatExt as _; - - #[test] - fn smoke() { - let (document, diagnostics) = Document::parse( - "version 1.2 - -# This is a comment attached to the task. -task foo # This is an inline comment. -{ - -} - -# This is a comment attached to the workflow. -workflow bar # This is inline with the workflow -{ - # This is attached to the call. - call foo {} -}", - ); - - assert!(diagnostics.is_empty()); - - let ast = document.ast(); - let ast = ast.as_v1().unwrap(); - let node = ast.as_format_node(); - } -} diff --git a/backup/wdl-format-old-2/src/registry.rs b/backup/wdl-format-old-2/src/registry.rs deleted file mode 100644 index 6a6b1cec8..000000000 --- a/backup/wdl-format-old-2/src/registry.rs +++ /dev/null @@ -1,373 +0,0 @@ -//! The format node registry. -//! -//! The format entity registry was introduced only to ensure that all nodes and -//! tokens in the concrete syntax tree have one and _only_ one analogous format -//! entity. -//! -//! The reason this is important to ensure statically is because this assumption -//! of one-to-one mapping between elements within the two types of trees is used -//! within formatting. For example, formatting works by traversing the CST of a -//! WDL document and attempting to cast a node to any format element. -//! -//! Furthermore, this is just a good invariant to uphold to ensure in general in -//! that the code remains straightforward to reason about (a CST element that -//! can map to multiple different formattable elements in different contexts is -//! inherently confusing). - -use std::any::type_name; -use std::collections::HashMap; -use std::sync::LazyLock; - -use wdl_ast::v1; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::Comment; -use wdl_ast::Ident; -use wdl_ast::SyntaxKind; -use wdl_ast::Version; -use wdl_ast::VersionStatement; -use wdl_ast::Whitespace; -use wdl_grammar::WorkflowDescriptionLanguage; -use wdl_grammar::ALL_SYNTAX_KIND; - -/// A private module for sealed traits. -/// -/// The traits are sealed because we want to ensure that we reserve the right to -/// implement them in the future unhindered without introducing breaking changes. -mod private { - /// The sealed trait for [`AstNodeRegistrant`](super::AstNodeRegistrant). - pub trait SealedNode {} - - /// The sealed trait for [`AstTokenRegistrant`](super::AstTokenRegistrant). - pub trait SealedToken {} -} - -/// A registry of all known mappings between format elements (individual Rust types -/// that implement the [`AstNode`] trait or [`AstToken`] trait) and the CST -/// elements they can be cast from (via [`SyntaxKind`]\(s)). -/// -/// This is useful for ensuring that AST elements have a one-to-one mapping with -/// CST element kinds. -static REGISTRY: LazyLock>> = LazyLock::new(|| { - let types = vec![ - Comment::register(), - Ident::register(), - v1::AccessExpr::register(), - v1::AdditionExpr::register(), - v1::AfterKeyword::register(), - v1::AliasKeyword::register(), - v1::ArrayType::register(), - v1::ArrayTypeKeyword::register(), - v1::AsKeyword::register(), - v1::Assignment::register(), - v1::Ast::register(), - v1::Asterisk::register(), - v1::BooleanTypeKeyword::register(), - v1::BoundDecl::register(), - v1::CallAfter::register(), - v1::CallAlias::register(), - v1::CallExpr::register(), - v1::CallInputItem::register(), - v1::CallKeyword::register(), - v1::CallStatement::register(), - v1::CallTarget::register(), - v1::CloseBrace::register(), - v1::CloseBracket::register(), - v1::CloseHeredoc::register(), - v1::CloseParen::register(), - v1::Colon::register(), - v1::Comma::register(), - v1::CommandKeyword::register(), - v1::CommandSection::register(), - v1::CommandText::register(), - v1::ConditionalStatement::register(), - v1::DefaultOption::register(), - v1::DirectoryTypeKeyword::register(), - v1::DivisionExpr::register(), - v1::Dot::register(), - v1::DoubleQuote::register(), - v1::ElseKeyword::register(), - v1::Equal::register(), - v1::EqualityExpr::register(), - v1::Exclamation::register(), - v1::Exponentiation::register(), - v1::ExponentiationExpr::register(), - v1::FalseKeyword::register(), - v1::FileTypeKeyword::register(), - v1::Float::register(), - v1::FloatTypeKeyword::register(), - v1::Greater::register(), - v1::GreaterEqual::register(), - v1::GreaterEqualExpr::register(), - v1::GreaterExpr::register(), - v1::HintsItem::register(), - v1::HintsKeyword::register(), - v1::HintsSection::register(), - v1::IfExpr::register(), - v1::IfKeyword::register(), - v1::ImportAlias::register(), - v1::ImportKeyword::register(), - v1::ImportStatement::register(), - v1::IndexExpr::register(), - v1::InequalityExpr::register(), - v1::InKeyword::register(), - v1::InputKeyword::register(), - v1::InputSection::register(), - v1::Integer::register(), - v1::IntTypeKeyword::register(), - v1::Less::register(), - v1::LessEqual::register(), - v1::LessEqualExpr::register(), - v1::LessExpr::register(), - v1::LiteralArray::register(), - v1::LiteralBoolean::register(), - v1::LiteralFloat::register(), - v1::LiteralHints::register(), - v1::LiteralHintsItem::register(), - v1::LiteralInput::register(), - v1::LiteralInputItem::register(), - v1::LiteralInteger::register(), - v1::LiteralMap::register(), - v1::LiteralMapItem::register(), - v1::LiteralNone::register(), - v1::LiteralNull::register(), - v1::LiteralObject::register(), - v1::LiteralObjectItem::register(), - v1::LiteralOutput::register(), - v1::LiteralOutputItem::register(), - v1::LiteralPair::register(), - v1::LiteralString::register(), - v1::LiteralStruct::register(), - v1::LiteralStructItem::register(), - v1::LogicalAnd::register(), - v1::LogicalAndExpr::register(), - v1::LogicalNotExpr::register(), - v1::LogicalOr::register(), - v1::LogicalOrExpr::register(), - v1::MapType::register(), - v1::MapTypeKeyword::register(), - v1::MetadataArray::register(), - v1::MetadataObject::register(), - v1::MetadataObjectItem::register(), - v1::MetadataSection::register(), - v1::MetaKeyword::register(), - v1::Minus::register(), - v1::ModuloExpr::register(), - v1::MultiplicationExpr::register(), - v1::NameRef::register(), - v1::NegationExpr::register(), - v1::NoneKeyword::register(), - v1::NotEqual::register(), - v1::NullKeyword::register(), - v1::ObjectKeyword::register(), - v1::ObjectType::register(), - v1::ObjectTypeKeyword::register(), - v1::OpenBrace::register(), - v1::OpenBracket::register(), - v1::OpenHeredoc::register(), - v1::OpenParen::register(), - v1::OutputKeyword::register(), - v1::OutputSection::register(), - v1::PairType::register(), - v1::PairTypeKeyword::register(), - v1::ParameterMetadataSection::register(), - v1::ParameterMetaKeyword::register(), - v1::ParenthesizedExpr::register(), - v1::Percent::register(), - v1::Placeholder::register(), - v1::PlaceholderOpen::register(), - v1::Plus::register(), - v1::PrimitiveType::register(), - v1::QuestionMark::register(), - v1::RequirementsItem::register(), - v1::RequirementsKeyword::register(), - v1::RequirementsSection::register(), - v1::RuntimeItem::register(), - v1::RuntimeKeyword::register(), - v1::RuntimeSection::register(), - v1::ScatterKeyword::register(), - v1::ScatterStatement::register(), - v1::SepOption::register(), - v1::SingleQuote::register(), - v1::Slash::register(), - v1::StringText::register(), - v1::StringTypeKeyword::register(), - v1::StructDefinition::register(), - v1::StructKeyword::register(), - v1::SubtractionExpr::register(), - v1::TaskDefinition::register(), - v1::TaskKeyword::register(), - v1::ThenKeyword::register(), - v1::TrueFalseOption::register(), - v1::TrueKeyword::register(), - v1::TypeRef::register(), - v1::UnboundDecl::register(), - v1::Unknown::register(), - v1::VersionKeyword::register(), - v1::WorkflowDefinition::register(), - v1::WorkflowKeyword::register(), - Version::register(), - VersionStatement::register(), - Whitespace::register(), - ]; - - let mut result = HashMap::new(); - - // NOTE: this is done this way instead of simply collecting into a - // [`HashMap`] to ensure on the fly that no keys are duplicated. - for (r#type, kinds) in types { - if result.contains_key(&r#type) { - panic!("the `{:?}` key is duplicated", r#type); - } - - result.insert(r#type, kinds); - } - - result -}); - -/// Computes the inverse of the registry. -/// -/// In other words, maps CST elements—dynamically typed as [`SyntaxKind`]s—to -/// the corresponding AST element(s) that can cast from them. -/// -/// This is useful for ensuring that AST elements have a one-to-one mapping with -/// CST element kinds. -fn inverse() -> HashMap> { - let mut result = HashMap::>::new(); - - for (key, values) in REGISTRY.iter() { - for value in values.into_iter() { - result.entry(value.to_owned()).or_default().push(*key); - } - } - - result - .into_iter() - .map(|(key, values)| (key, values.into_boxed_slice())) - .collect() -} - -trait AstNodeRegistrant: private::SealedNode { - /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST node type. - fn register() -> (&'static str, Box<[SyntaxKind]>); -} - -impl + 'static> private::SealedNode for T {} - -impl + 'static> AstNodeRegistrant for T { - fn register() -> (&'static str, Box<[SyntaxKind]>) { - ( - type_name::(), - ALL_SYNTAX_KIND - .iter() - .filter(|kind| T::can_cast(**kind)) - .cloned() - .collect::>() - .into_boxed_slice(), - ) - } -} - -trait AstTokenRegistrant: private::SealedToken { - /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST token type. - fn register() -> (&'static str, Box<[SyntaxKind]>); -} - -impl private::SealedToken for T {} - -impl AstTokenRegistrant for T { - fn register() -> (&'static str, Box<[SyntaxKind]>) { - ( - type_name::(), - ALL_SYNTAX_KIND - .iter() - .filter(|kind| T::can_cast(**kind)) - .cloned() - .collect::>() - .into_boxed_slice(), - ) - } -} - -mod tests { - use super::*; - - /// This test ensures there is a one-to-one mapping between CST elements - /// ([`SyntaxKind`]\(s)) and AST elements (Rust types that implement - /// the [`AstNode`] trait or the [`AstToken`] trait). - /// - /// The importance of this is described at the top of the module. - #[test] - fn ensure_one_to_one() { - let mut missing = Vec::new(); - let mut multiple = Vec::new(); - - let inverse_registry = inverse(); - - for kind in ALL_SYNTAX_KIND { - // NOTE: these are symbolic elements and should not be included in - // the analysis here. - if kind.is_symbolic() { - continue; - } - - match inverse_registry.get(kind) { - // SAFETY: because this is an inverse registry, only - // [`SyntaxKind`]s with at least one registered implementing - // type would be registered here. Thus, by design of the - // `inverse()` method, this will never occur. - Some(values) if values.is_empty() => { - unreachable!("the inverse registry should never contain an empty array") - } - Some(values) if values.len() > 1 => multiple.push((kind, values)), - None => missing.push(kind), - // NOTE: this is essentially only if the values exist and the - // length is 1—in that case, there is a one to one mapping, - // which is what we would like the case to be. - _ => {} - } - } - - if !missing.is_empty() { - let mut missing = missing - .into_iter() - .map(|kind| format!("{:?}", kind)) - .collect::>(); - missing.sort(); - - panic!( - "detected `SyntaxKind`s without an associated `AstNode`/`AstToken` (n={}): {}", - missing.len(), - missing.join(", ") - ) - } - - if !multiple.is_empty() { - multiple.sort(); - let mut multiple = multiple - .into_iter() - .map(|(kind, types)| { - let mut types = types.clone(); - types.sort(); - - let mut result = format!("== {:?} ==", kind); - for r#type in types { - result.push_str("\n* "); - result.push_str(r#type); - } - - result - }) - .collect::>(); - multiple.sort(); - - panic!( - "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s \ - (n={}):\n\n{}", - multiple.len(), - multiple.join("\n\n") - ) - } - } -} diff --git a/backup/wdl-format-old-2/src/task.rs b/backup/wdl-format-old-2/src/task.rs deleted file mode 100644 index 6e01b8843..000000000 --- a/backup/wdl-format-old-2/src/task.rs +++ /dev/null @@ -1,455 +0,0 @@ -//! A module for formatting elements in tasks. - -use wdl_ast::v1::CommandPart; -use wdl_ast::v1::CommandSection; -use wdl_ast::v1::CommandText; -use wdl_ast::v1::Decl; -use wdl_ast::v1::RequirementsItem; -use wdl_ast::v1::RequirementsSection; -use wdl_ast::v1::RuntimeItem; -use wdl_ast::v1::RuntimeSection; -use wdl_ast::v1::TaskDefinition; -use wdl_ast::v1::TaskItem; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; - -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::NEWLINE; - -impl Formattable for CommandText { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for CommandSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let command_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::CommandKeyword); formatter.indent(writer)?; - // write!(writer, "{}", command_keyword)?; - // format_inline_comment(&command_keyword, writer, formatter, true)?; - - // // coerce all command sections to use heredoc ('<<<>>>>') syntax - // // (as opposed to bracket ('{}') syntax) - // let open_section = if self.is_heredoc() { - // first_child_of_kind(self.syntax(), SyntaxKind::OpenHeredoc) - // } else { - // first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace) - // }; - // format_preceding_comments(&open_section, writer, formatter, true)?; - - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "<<<")?; - - // for part in self.parts() { - // match part { - // CommandPart::Text(t) => { - // t.format(writer, formatter)?; - // } - // CommandPart::Placeholder(p) => { - // p.format(writer, formatter)?; - // } - // } - // } - - // write!(writer, ">>>")?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RuntimeItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let name = self.name(); - // formatter.indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RuntimeSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let runtime_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::RuntimeKeyword); formatter.indent(writer)?; - // write!(writer, "{}", runtime_keyword)?; - // format_inline_comment(&runtime_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for item in self.items() { - // item.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, true)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RequirementsItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let name = self.name(); - // formatter.indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RequirementsSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let requirements_keyword = - // first_child_of_kind(self.syntax(), SyntaxKind::RequirementsKeyword); - // formatter.indent(writer)?; - // write!(writer, "{}", requirements_keyword)?; - // format_inline_comment(&requirements_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for item in self.items() { - // item.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, true)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for TaskDefinition { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let task_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::TaskKeyword); formatter.indent(writer)?; - // write!(writer, "{}", task_keyword)?; - // format_inline_comment(&task_keyword, writer, formatter, true)?; - - // let name = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // let mut meta_section_str = String::new(); - // let mut parameter_meta_section_str = String::new(); - // let mut input_section_str = String::new(); - // let mut declaration_section_str = String::new(); - // let mut command_section_str = String::new(); - // let mut output_section_str = String::new(); - // let mut runtime_section_str = String::new(); - // let mut hints_section_str = String::new(); - // let mut requirements_section_str = String::new(); - - // for item in self.items() { - // match item { - // TaskItem::Metadata(m) => { - // m.format(&mut meta_section_str, formatter)?; - // } - // TaskItem::ParameterMetadata(pm) => { - // pm.format(&mut parameter_meta_section_str, formatter)?; - // } - // TaskItem::Input(i) => { - // i.format(&mut input_section_str, formatter)?; - // } - // TaskItem::Declaration(d) => { - // Decl::Bound(d).format(&mut declaration_section_str, formatter)?; - // } - // TaskItem::Command(c) => { - // c.format(&mut command_section_str, formatter)?; - // } - // TaskItem::Output(o) => { - // o.format(&mut output_section_str, formatter)?; - // } - // TaskItem::Runtime(r) => { - // r.format(&mut runtime_section_str, formatter)?; - // } - // TaskItem::Hints(h) => { - // h.format(&mut hints_section_str, formatter)?; - // } - // TaskItem::Requirements(r) => { - // r.format(&mut requirements_section_str, formatter)?; - // } - // } - // } - - // let mut first_section = true; - - // if !meta_section_str.is_empty() { - // first_section = false; - // write!(writer, "{}", meta_section_str)?; - // } - // if !parameter_meta_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // first_section = false; - // write!(writer, "{}", parameter_meta_section_str)?; - // } - // if !input_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // first_section = false; - // write!(writer, "{}", input_section_str)?; - // } - // if !declaration_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // first_section = false; - // write!(writer, "{}", declaration_section_str)?; - // } - // // Command section is required - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", command_section_str)?; - // if !output_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", output_section_str)?; - // } - // if !runtime_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", runtime_section_str)?; - // } - // if !hints_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", hints_section_str)?; - // } - // if !requirements_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", requirements_section_str)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, true)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} diff --git a/backup/wdl-format-old-2/src/token.rs b/backup/wdl-format-old-2/src/token.rs deleted file mode 100644 index e87111c36..000000000 --- a/backup/wdl-format-old-2/src/token.rs +++ /dev/null @@ -1,82 +0,0 @@ -use std::borrow::Cow; - -use crate::Writable; - -#[derive(Debug, Eq, PartialEq)] -pub enum Token<'a> { - Indent, - Dedent, - Literal(Cow<'a, str>), -} - -impl<'a> From<&'a str> for Token<'a> { - fn from(value: &'a str) -> Self { - Token::Literal(Cow::Borrowed(value)) - } -} - -impl From for Token<'_> { - fn from(value: String) -> Self { - Token::Literal(Cow::Owned(value)) - } -} - -#[derive(Debug, Default)] -pub struct TokenStream<'a>(pub(crate) Vec>); - -impl<'a> TokenStream<'a> { - pub fn indent(&mut self) { - self.0.push(Token::Indent); - } - - pub fn dedent(&mut self) { - self.0.push(Token::Dedent); - } - - pub fn write + 'a>(&mut self, value: W) { - value.write(self); - } - - pub fn indented(&mut self, mut f: F) { - // Indents the block. - self.indent(); - - // Runs the inner function. - f(self); - - // Dedents the block. - self.dedent(); - } - - pub fn inner(&self) -> &Vec> { - &self.0 - } - - pub fn into_inner(self) -> Vec> { - self.0 - } -} - -#[cfg(test)] -mod tests { - use std::borrow::Cow; - - use super::*; - - #[test] - fn smoke() { - let mut stream = TokenStream::default(); - stream.indented(|stream| { - stream.write("Hello, world!"); - }); - - assert_eq!( - stream.into_inner(), - vec![ - Token::Indent, - Token::Literal(Cow::Owned("Hello, world!".to_string())), - Token::Dedent - ] - ) - } -} diff --git a/backup/wdl-format-old-2/src/tokens.rs b/backup/wdl-format-old-2/src/tokens.rs deleted file mode 100644 index e3c525cac..000000000 --- a/backup/wdl-format-old-2/src/tokens.rs +++ /dev/null @@ -1,2089 +0,0 @@ -//! V1 AST tokens. - -use crate::AstToken; -use crate::SyntaxKind; -use crate::SyntaxToken; - -/// A token representing the `after` keyword. -#[derive(Debug)] -pub struct AfterKeyword(SyntaxToken); - -impl AstToken for AfterKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::AfterKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option - where - Self: Sized, - { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for AfterKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "after") - } -} - -/// A token representing the `alias` keyword. -#[derive(Debug)] -pub struct AliasKeyword(SyntaxToken); - -impl AstToken for AliasKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::AliasKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for AliasKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "alias") - } -} - -/// A token representing the `Array` type keyword. -#[derive(Debug)] -pub struct ArrayTypeKeyword(SyntaxToken); - -impl AstToken for ArrayTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ArrayTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ArrayTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Array") - } -} - -/// A token representing the `as` keyword. -#[derive(Debug)] -pub struct AsKeyword(SyntaxToken); - -impl AstToken for AsKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::AsKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for AsKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "as") - } -} - -/// A token representing the `=` symbol. -#[derive(Debug)] -pub struct Assignment(SyntaxToken); - -impl AstToken for Assignment { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Assignment) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Assignment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "=") - } -} - -/// A token representing the `*` symbol. -#[derive(Debug)] -pub struct Asterisk(SyntaxToken); - -impl AstToken for Asterisk { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Asterisk) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Asterisk { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "*") - } -} - -/// A token representing the `Boolean` keyword. -#[derive(Debug)] -pub struct BooleanTypeKeyword(SyntaxToken); - -impl AstToken for BooleanTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::BooleanTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for BooleanTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Boolean") - } -} - -/// A token representing the `call` keyword. -#[derive(Debug)] -pub struct CallKeyword(SyntaxToken); - -impl AstToken for CallKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::CallKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for CallKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "call") - } -} - -/// A token representing the `}` symbol. -#[derive(Debug)] -pub struct CloseBrace(SyntaxToken); - -impl AstToken for CloseBrace { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::CloseBrace) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for CloseBrace { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "}}") - } -} - -/// A token representing the `]` symbol. -#[derive(Debug)] -pub struct CloseBracket(SyntaxToken); - -impl AstToken for CloseBracket { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::CloseBracket) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for CloseBracket { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "]") - } -} - -/// A token representing the `>>>` token. -#[derive(Debug)] -pub struct CloseHeredoc(SyntaxToken); - -impl AstToken for CloseHeredoc { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::CloseHeredoc) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for CloseHeredoc { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ">>>") - } -} - -/// A token representing the `)` symbol. -#[derive(Debug)] -pub struct CloseParen(SyntaxToken); - -impl AstToken for CloseParen { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::CloseParen) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for CloseParen { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ")") - } -} - -/// A token representing the `:` symbol. -#[derive(Debug)] -pub struct Colon(SyntaxToken); - -impl AstToken for Colon { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Colon) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Colon { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ":") - } -} - -/// A token representing the `,` symbol. -#[derive(Debug)] -pub struct Comma(SyntaxToken); - -impl AstToken for Comma { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Comma) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Comma { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ",") - } -} - -/// A token representing the `command` keyword. -#[derive(Debug)] -pub struct CommandKeyword(SyntaxToken); - -impl AstToken for CommandKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::CommandKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for CommandKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "command") - } -} - -/// A token representing the `Directory` type keyword. -#[derive(Debug)] -pub struct DirectoryTypeKeyword(SyntaxToken); - -impl AstToken for DirectoryTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::DirectoryTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for DirectoryTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Directory") - } -} - -/// A token representing the `.` symbol. -#[derive(Debug)] -pub struct Dot(SyntaxToken); - -impl AstToken for Dot { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Dot) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Dot { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ".") - } -} - -/// A token representing the `"` symbol. -#[derive(Debug)] -pub struct DoubleQuote(SyntaxToken); - -impl AstToken for DoubleQuote { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::DoubleQuote) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for DoubleQuote { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, r#"""#) - } -} - -/// A token representing the `else` keyword. -#[derive(Debug)] -pub struct ElseKeyword(SyntaxToken); - -impl AstToken for ElseKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ElseKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ElseKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "else") - } -} - -/// A token representing the `==` symbol. -#[derive(Debug)] -pub struct Equal(SyntaxToken); - -impl AstToken for Equal { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Equal) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Equal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "=") - } -} - -/// A token representing the `!` symbol. -#[derive(Debug)] -pub struct Exclamation(SyntaxToken); - -impl AstToken for Exclamation { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Exclamation) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Exclamation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "!") - } -} - -/// A token representing the `**` keyword. -#[derive(Debug)] -pub struct Exponentiation(SyntaxToken); - -impl AstToken for Exponentiation { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Exponentiation) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Exponentiation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "**") - } -} - -/// A token representing the `false` keyword. -#[derive(Debug)] -pub struct FalseKeyword(SyntaxToken); - -impl AstToken for FalseKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::FalseKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for FalseKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "false") - } -} - -/// A token representing the `File` type keyword. -#[derive(Debug)] -pub struct FileTypeKeyword(SyntaxToken); - -impl AstToken for FileTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::FileTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for FileTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "File") - } -} - -/// A token representing the `Float` type keyword. -#[derive(Debug)] -pub struct FloatTypeKeyword(SyntaxToken); - -impl AstToken for FloatTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::FloatTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for FloatTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Float") - } -} - -/// A token representing the `>` symbol. -#[derive(Debug)] -pub struct Greater(SyntaxToken); - -impl AstToken for Greater { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Greater) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Greater { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ">") - } -} - -/// A token representing the `>=` symbol. -#[derive(Debug)] -pub struct GreaterEqual(SyntaxToken); - -impl AstToken for GreaterEqual { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::GreaterEqual) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for GreaterEqual { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, ">=") - } -} - -/// A token representing the `hints` keyword. -#[derive(Debug)] -pub struct HintsKeyword(SyntaxToken); - -impl AstToken for HintsKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::HintsKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for HintsKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "hints") - } -} - -/// A token representing the `if` keyword. -#[derive(Debug)] -pub struct IfKeyword(SyntaxToken); - -impl AstToken for IfKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::IfKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for IfKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "if") - } -} - -/// A token representing the `import` keyword. -#[derive(Debug)] -pub struct ImportKeyword(SyntaxToken); - -impl AstToken for ImportKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ImportKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ImportKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "import") - } -} - -/// A token representing the `in` keyword. -#[derive(Debug)] -pub struct InKeyword(SyntaxToken); - -impl AstToken for InKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::InKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for InKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "in") - } -} - -/// A token representing the `input` keyword. -#[derive(Debug)] -pub struct InputKeyword(SyntaxToken); - -impl AstToken for InputKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::InputKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for InputKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "input") - } -} - -/// A token representing the `Int` type keyword. -#[derive(Debug)] -pub struct IntTypeKeyword(SyntaxToken); - -impl AstToken for IntTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::IntTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for IntTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Int") - } -} - -/// A token representing the `<` symbol. -#[derive(Debug)] -pub struct Less(SyntaxToken); - -impl AstToken for Less { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Less) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Less { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "<") - } -} - -/// A token representing the `<=` symbol. -#[derive(Debug)] -pub struct LessEqual(SyntaxToken); - -impl AstToken for LessEqual { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::LessEqual) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for LessEqual { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "<=") - } -} - -/// A token representing the `&&` symbol. -#[derive(Debug)] -pub struct LogicalAnd(SyntaxToken); - -impl AstToken for LogicalAnd { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::LogicalAnd) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for LogicalAnd { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "&&") - } -} - -/// A token representing the `||` symbol. -#[derive(Debug)] -pub struct LogicalOr(SyntaxToken); - -impl AstToken for LogicalOr { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::LogicalOr) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for LogicalOr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "||") - } -} - -/// A token representing the `Map` type keyword. -#[derive(Debug)] -pub struct MapTypeKeyword(SyntaxToken); - -impl AstToken for MapTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::MapTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for MapTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Map") - } -} - -/// A token representing the `meta` keyword. -#[derive(Debug)] -pub struct MetaKeyword(SyntaxToken); - -impl AstToken for MetaKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::MetaKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for MetaKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "meta") - } -} - -/// A token representing the `-` symbol. -#[derive(Debug)] -pub struct Minus(SyntaxToken); - -impl AstToken for Minus { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Minus) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Minus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "-") - } -} - -/// A token representing the `None` keyword. -#[derive(Debug)] -pub struct NoneKeyword(SyntaxToken); - -impl AstToken for NoneKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::NoneKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for NoneKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "None") - } -} - -/// A token representing the `!=` symbol. -#[derive(Debug)] -pub struct NotEqual(SyntaxToken); - -impl AstToken for NotEqual { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::NotEqual) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for NotEqual { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "!=") - } -} - -/// A token representing the `null` keyword. -#[derive(Debug)] -pub struct NullKeyword(SyntaxToken); - -impl AstToken for NullKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::NullKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for NullKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "null") - } -} - -/// A token representing the `object` keyword. -#[derive(Debug)] -pub struct ObjectKeyword(SyntaxToken); - -impl AstToken for ObjectKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ObjectKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ObjectKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "object") - } -} - -/// A token representing the `Object` type keyword. -#[derive(Debug)] -pub struct ObjectTypeKeyword(SyntaxToken); - -impl AstToken for ObjectTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ObjectTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ObjectTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Object") - } -} - -/// A token representing the `{` symbol. -#[derive(Debug)] -pub struct OpenBrace(SyntaxToken); - -impl AstToken for OpenBrace { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::OpenBrace) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for OpenBrace { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{{") - } -} - -/// A token representing the `[` symbol. -#[derive(Debug)] -pub struct OpenBracket(SyntaxToken); - -impl AstToken for OpenBracket { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::OpenBracket) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for OpenBracket { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[") - } -} - -/// A token representing the `<<<` symbol. -#[derive(Debug)] -pub struct OpenHeredoc(SyntaxToken); - -impl AstToken for OpenHeredoc { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::OpenHeredoc) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for OpenHeredoc { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "<<<") - } -} - -/// A token representing the `(` keyword. -#[derive(Debug)] -pub struct OpenParen(SyntaxToken); - -impl AstToken for OpenParen { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::OpenParen) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for OpenParen { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "(") - } -} - -/// A token representing the `output` keyword. -#[derive(Debug)] -pub struct OutputKeyword(SyntaxToken); - -impl AstToken for OutputKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::OutputKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for OutputKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "output") - } -} - -/// A token representing the `Pair` type keyword. -#[derive(Debug)] -pub struct PairTypeKeyword(SyntaxToken); - -impl AstToken for PairTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::PairTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for PairTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Pair") - } -} - -/// A token representing the `parameter_meta` keyword. -#[derive(Debug)] -pub struct ParameterMetaKeyword(SyntaxToken); - -impl AstToken for ParameterMetaKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ParameterMetaKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ParameterMetaKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "parameter_meta") - } -} - -/// A token representing the `%` symbol. -#[derive(Debug)] -pub struct Percent(SyntaxToken); - -impl AstToken for Percent { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Percent) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Percent { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "%") - } -} - -/// Represents one of the placeholder open symbols. -#[derive(Debug)] -pub struct PlaceholderOpen(SyntaxToken); - -impl AstToken for PlaceholderOpen { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::PlaceholderOpen) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for PlaceholderOpen { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // NOTE: this is deferred to the entire underlying string simply because - // we cannot known a priori what the captured text is. - write!(f, "{}", self.0) - } -} - -/// A token representing the `+` symbol. -#[derive(Debug)] -pub struct Plus(SyntaxToken); - -impl AstToken for Plus { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Plus) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Plus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "+") - } -} - -/// A token representing the `?` symbol. -#[derive(Debug)] -pub struct QuestionMark(SyntaxToken); - -impl AstToken for QuestionMark { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::QuestionMark) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for QuestionMark { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "?") - } -} - -/// A token representing the `requirements` keyword. -#[derive(Debug)] -pub struct RequirementsKeyword(SyntaxToken); - -impl AstToken for RequirementsKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::RequirementsKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for RequirementsKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "requirements") - } -} - -/// A token representing the `runtime` keyword. -#[derive(Debug)] -pub struct RuntimeKeyword(SyntaxToken); - -impl AstToken for RuntimeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::RuntimeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for RuntimeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "runtime") - } -} - -/// A token representing the `scatter` keyword. -#[derive(Debug)] -pub struct ScatterKeyword(SyntaxToken); - -impl AstToken for ScatterKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ScatterKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ScatterKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "scatter") - } -} - -/// A token representing the `'` symbol. -#[derive(Debug)] -pub struct SingleQuote(SyntaxToken); - -impl AstToken for SingleQuote { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::SingleQuote) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for SingleQuote { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "'") - } -} - -/// A token representing the `/` symbol. -#[derive(Debug)] -pub struct Slash(SyntaxToken); - -impl AstToken for Slash { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Slash) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Slash { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "/") - } -} - -/// A token representing the `String` type keyword. -#[derive(Debug)] -pub struct StringTypeKeyword(SyntaxToken); - -impl AstToken for StringTypeKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::StringTypeKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for StringTypeKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "String") - } -} - -/// A token representing the `struct` keyword. -#[derive(Debug)] -pub struct StructKeyword(SyntaxToken); - -impl AstToken for StructKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::StructKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for StructKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "struct") - } -} - -/// A token representing the `task` keyword. -#[derive(Debug)] -pub struct TaskKeyword(SyntaxToken); - -impl AstToken for TaskKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::TaskKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for TaskKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "task") - } -} - -/// A token representing the `then` keyword. -#[derive(Debug)] -pub struct ThenKeyword(SyntaxToken); - -impl AstToken for ThenKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::ThenKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for ThenKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "then") - } -} - -/// A token representing the `true` keyword. -#[derive(Debug)] -pub struct TrueKeyword(SyntaxToken); - -impl AstToken for TrueKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::TrueKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for TrueKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "true") - } -} - -/// A token representing unknown contents within a WDL document. -#[derive(Debug)] -pub struct Unknown(SyntaxToken); - -impl AstToken for Unknown { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::Unknown) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for Unknown { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // NOTE: this is deferred to the entire underlying string simply because - // we cannot known a priori what the captured text is. - write!(f, "{}", self.0) - } -} - -/// A token representing the `version` keyword. -#[derive(Debug)] -pub struct VersionKeyword(SyntaxToken); - -impl AstToken for VersionKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::VersionKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for VersionKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "version") - } -} - -/// A token representing the `workflow` keyword. -#[derive(Debug)] -pub struct WorkflowKeyword(SyntaxToken); - -impl AstToken for WorkflowKeyword { - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!(kind, SyntaxKind::WorkflowKeyword) - } - - fn cast(syntax: SyntaxToken) -> Option { - if Self::can_cast(syntax.kind()) { - return Some(Self(syntax)); - } - None - } - - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - -impl std::fmt::Display for WorkflowKeyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "workflow") - } -} diff --git a/backup/wdl-format-old-2/src/v1.rs b/backup/wdl-format-old-2/src/v1.rs deleted file mode 100644 index 561a58ebc..000000000 --- a/backup/wdl-format-old-2/src/v1.rs +++ /dev/null @@ -1,711 +0,0 @@ -//! A module for formatting WDL v1 elements. - -use std::fmt::Write; - -use wdl_ast::v1::Decl; -use wdl_ast::v1::DefaultOption; -use wdl_ast::v1::DocumentItem; -use wdl_ast::v1::Expr; -use wdl_ast::v1::HintsItem; -use wdl_ast::v1::HintsSection; -use wdl_ast::v1::InputSection; -use wdl_ast::v1::LiteralBoolean; -use wdl_ast::v1::LiteralFloat; -use wdl_ast::v1::LiteralInteger; -use wdl_ast::v1::LiteralString; -use wdl_ast::v1::OutputSection; -use wdl_ast::v1::Placeholder; -use wdl_ast::v1::PlaceholderOption; -use wdl_ast::v1::SepOption; -use wdl_ast::v1::StringPart; -use wdl_ast::v1::StringText; -use wdl_ast::v1::StructDefinition; -use wdl_ast::v1::StructKeyword; -use wdl_ast::v1::TrueFalseOption; -use wdl_ast::v1::Type; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; -use wdl_grammar::SyntaxExt; - -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::NEWLINE; -use super::STRING_TERMINATOR; - -impl Formattable for DefaultOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let default_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); - // format_preceding_comments(&default_word, writer, formatter, true)?; - // write!(writer, "{}", default_word)?; - // format_inline_comment(&default_word, writer, formatter, true)?; - - // let assignment = first_child_of_kind(self.syntax(), SyntaxKind::Assignment); - // format_preceding_comments(&assignment, writer, formatter, true)?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // let value = self.value(); - // format_preceding_comments( - // &SyntaxElement::from(value.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // value.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(value.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for SepOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let sep_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); - // format_preceding_comments(&sep_word, writer, formatter, true)?; - // write!(writer, "{}", sep_word)?; - // format_inline_comment(&sep_word, writer, formatter, true)?; - - // let assignment = first_child_of_kind(self.syntax(), - // SyntaxKind::Assignment); format_preceding_comments(&assignment, - // writer, formatter, true)?; formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // let separator = self.separator(); - // format_preceding_comments( - // &SyntaxElement::from(separator.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // separator.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(separator.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for TrueFalseOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let mut true_clause = String::new(); - // let mut false_clause = String::new(); - // let mut which_clause = None; - // for child in self.syntax().children_with_tokens() { - // match child.kind() { - // SyntaxKind::TrueKeyword => { - // which_clause = Some(true); - - // format_preceding_comments(&child, &mut true_clause, formatter, - // true)?; write!(true_clause, "{}", child)?; - // format_inline_comment(&child, &mut true_clause, formatter, - // true)?; } - // SyntaxKind::FalseKeyword => { - // which_clause = Some(false); - - // format_preceding_comments(&child, &mut false_clause, formatter, - // true)?; write!(false_clause, "{}", child)?; - // format_inline_comment(&child, &mut false_clause, formatter, - // true)?; } - // SyntaxKind::Assignment => { - // let cur_clause = match which_clause { - // Some(true) => &mut true_clause, - // Some(false) => &mut false_clause, - // _ => unreachable!( - // "should have found a true or false keyword before an - // assignment" ), - // }; - - // format_preceding_comments(&child, cur_clause, formatter, true)?; - // formatter.space_or_indent(cur_clause)?; - // write!(cur_clause, "{}", child)?; - // format_inline_comment(&child, cur_clause, formatter, true)?; - // } - // SyntaxKind::LiteralStringNode => { - // let cur_clause = match which_clause { - // Some(true) => &mut true_clause, - // Some(false) => &mut false_clause, - // _ => unreachable!( - // "should have found a true or false keyword before a - // string" ), - // }; - - // format_preceding_comments(&child, cur_clause, formatter, true)?; - // formatter.space_or_indent(cur_clause)?; - // let literal_string = LiteralString::cast( - // child - // .as_node() - // .expect("LiteralStringNode should be a node") - // .clone(), - // ) - // .expect("LiteralStringNode should cast to a LiteralString"); - // literal_string.format(cur_clause, formatter)?; - // format_inline_comment(&child, writer, formatter, true)?; - // } - // SyntaxKind::Whitespace => { - // // Ignore - // } - // SyntaxKind::Comment => { - // // Handled by a call to `format_preceding_comments` - // // or `format_inline_comment` in another match arm. - // } - // _ => { - // unreachable!("Unexpected syntax kind: {:?}", child.kind()); - // } - // } - // } - // write!(writer, "{} {}", true_clause, false_clause)?; - - Ok(()) - } -} - -impl Formattable for PlaceholderOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - PlaceholderOption::Default(default) => default.format(writer, formatter), - PlaceholderOption::Sep(sep) => sep.format(writer, formatter), - PlaceholderOption::TrueFalse(true_false) => true_false.format(writer, formatter), - } - } -} - -impl Formattable for Placeholder { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // coerce all placeholders into '~{}' placeholders - // (as opposed to '${}' placeholders) - write!(writer, "~{{")?; - - let mut option_present = false; - if let Some(option) = self.options().next() { - option.format(writer, formatter)?; - option_present = true; - } - - let expr = self.expr(); - if option_present { - formatter.space_or_indent(writer)?; - } - expr.format(writer, formatter)?; - - write!(writer, "}}") - } -} - -impl Formattable for StringText { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - let mut iter = self.as_str().chars().peekable(); - let mut prev_c = None; - while let Some(c) = iter.next() { - match c { - '\\' => { - if let Some(next_c) = iter.peek() { - if *next_c == '\'' { - // Do not write this backslash - prev_c = Some(c); - continue; - } - } - writer.write_char(c)?; - } - '"' => { - if let Some(pc) = prev_c { - if pc != '\\' { - writer.write_char('\\')?; - } - } - writer.write_char(c)?; - } - _ => { - writer.write_char(c)?; - } - } - prev_c = Some(c); - } - - Ok(()) - } -} - -impl Formattable for LiteralString { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", STRING_TERMINATOR)?; - for part in self.parts() { - match part { - StringPart::Text(text) => { - text.format(writer, formatter)?; - } - StringPart::Placeholder(placeholder) => { - placeholder.format(writer, formatter)?; - } - } - } - write!(writer, "{}", STRING_TERMINATOR) - } -} - -impl Formattable for LiteralBoolean { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.value()) // TODO - } -} - -impl Formattable for LiteralFloat { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for LiteralInteger { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for Type { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for Expr { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for Decl { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let ty = self.ty(); - // formatter.indent(writer)?; - // ty.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(ty.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let name = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // if let Some(expr) = self.expr() { - // let assignment = first_child_of_kind(self.syntax(), - // SyntaxKind::Assignment); format_preceding_comments(&assignment, - // writer, formatter, true)?; formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // } - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for InputSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let input_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::InputKeyword); formatter.indent(writer)?; - // write!(writer, "{}", input_keyword)?; - // format_inline_comment(&input_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for decl in self.declarations() { - // decl.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for OutputSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let output_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::OutputKeyword); formatter.indent(writer)?; - // write!(writer, "{}", output_keyword)?; - // format_inline_comment(&output_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for decl in self.declarations() { - // Decl::Bound(decl).format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for HintsItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let name = self.name(); - // formatter.indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for HintsSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let hints_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::HintsKeyword); formatter.indent(writer)?; - // write!(writer, "{}", hints_keyword)?; - // format_inline_comment(&hints_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for item in self.items() { - // item.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for StructKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for StructDefinition { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), false, true)?; - - let struct_keyword = self.keyword(); - struct_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, struct_keyword.syntax().inline_comment(), true)?; - - let name = self.name(); - formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; - formatter.space_or_indent(writer)?; - name.format(writer, formatter)?; - formatter.format_inline_comment(writer, name.syntax().inline_comment(), true)?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // if let Some(m) = self.metadata().next() { - // m.format(writer, formatter)?; - // write!(writer, "{}", NEWLINE)?; - // } - - // if let Some(pm) = self.parameter_metadata().next() { - // pm.format(writer, formatter)?; - // write!(writer, "{}", NEWLINE)?; - // } - - // for decl in self.members() { - // Decl::Unbound(decl).format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = self - // .syntax() - // .children_with_tokens() - // .find(|element| element.kind() == SyntaxKind::CloseBrace) - // .expect("StructDefinition should have a close brace"); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for DocumentItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - DocumentItem::Import(_) => { - unreachable!("Import statements should not be formatted as a DocumentItem") - } - DocumentItem::Workflow(workflow) => workflow.format(writer, formatter), - DocumentItem::Task(task) => task.format(writer, formatter), - DocumentItem::Struct(structure) => structure.format(writer, formatter), - } - } -} diff --git a/backup/wdl-format-old-2/src/workflow.rs b/backup/wdl-format-old-2/src/workflow.rs deleted file mode 100644 index f82e57003..000000000 --- a/backup/wdl-format-old-2/src/workflow.rs +++ /dev/null @@ -1,666 +0,0 @@ -//! A module for formatting elements in workflows. - -use wdl_ast::v1::CallAfter; -use wdl_ast::v1::CallAlias; -use wdl_ast::v1::CallInputItem; -use wdl_ast::v1::CallStatement; -use wdl_ast::v1::ConditionalStatement; -use wdl_ast::v1::Decl; -use wdl_ast::v1::ScatterStatement; -use wdl_ast::v1::WorkflowDefinition; -use wdl_ast::v1::WorkflowItem; -use wdl_ast::v1::WorkflowStatement; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; - -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::NEWLINE; - -impl Formattable for CallAlias { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let as_keyword = first_child_of_kind(self.syntax(), SyntaxKind::AsKeyword); - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", as_keyword)?; - // format_inline_comment(&as_keyword, writer, formatter, true)?; - - // let ident = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // ident.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for CallAfter { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let after_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::AfterKeyword); formatter.space_or_indent(writer)?; - // write!(writer, "{}", after_keyword)?; - // format_inline_comment(&after_keyword, writer, formatter, true)?; - - // let ident = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // ident.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for CallInputItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let name = self.name(); - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // if let Some(expr) = self.expr() { - // let assignment = first_child_of_kind(self.syntax(), - // SyntaxKind::Assignment); format_preceding_comments(&assignment, - // writer, formatter, true)?; formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // } - - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for CallStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let call_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::CallKeyword); formatter.indent(writer)?; - // write!(writer, "{}", call_keyword)?; - // format_inline_comment(&call_keyword, writer, formatter, true)?; - - // let target = self.target(); - // format_preceding_comments( - // &SyntaxElement::Node(target.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", target.syntax())?; - // format_inline_comment( - // &SyntaxElement::Node(target.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // if let Some(alias) = self.alias() { - // alias.format(writer, formatter)?; - // } - - // for after in self.after() { - // after.format(writer, formatter)?; - // } - - // let inputs: Vec<_> = self.inputs().collect(); - // if !inputs.is_empty() { - // let open_brace = first_child_of_kind(self.syntax(), - // SyntaxKind::OpenBrace); format_preceding_comments(&open_brace, - // writer, formatter, true)?; // Open braces should ignore the "+1 - // rule" followed by other interrupted // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, true)?; - - // // TODO consider detecting if document is >= v1.2 and forcing the - // optional input // syntax - // if let Some(input_keyword) = self - // .syntax() - // .children_with_tokens() - // .find(|c| c.kind() == SyntaxKind::InputKeyword) - // { - // format_preceding_comments(&input_keyword, writer, formatter, true)?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", input_keyword)?; - // format_inline_comment(&input_keyword, writer, formatter, true)?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - // } // else v1.2 syntax - - // if inputs.len() == 1 { - // let input = inputs.first().expect("inputs should have a first - // element"); format_preceding_comments( - // &SyntaxElement::from(input.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // input.format(writer, formatter)?; - // // TODO there may be a trailing comma with comments attached to it - - // let close_brace = first_child_of_kind(self.syntax(), - // SyntaxKind::CloseBrace); format_preceding_comments(& - // close_brace, writer, formatter, true)?; formatter. - // space_or_indent(writer)?; write!(writer, "{}", close_brace)?; - // } else { - // // multiple inputs - // let mut commas = self - // .syntax() - // .children_with_tokens() - // .filter(|c| c.kind() == SyntaxKind::Comma); - - // formatter.increment_indent(); - - // for input in inputs { - // if !formatter.interrupted() { - // write!(writer, "{}", NEWLINE)?; - // } else { - // formatter.reset_interrupted(); - // } - // format_preceding_comments( - // &SyntaxElement::from(input.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - // formatter.indent(writer)?; - // input.format(writer, formatter)?; - // if let Some(cur_comma) = commas.next() { - // format_preceding_comments(&cur_comma, writer, formatter, - // true)?; write!(writer, ",")?; - // format_inline_comment(&cur_comma, writer, formatter, true)?; - // } else { - // write!(writer, ",")?; - // } - // } - // if !formatter.interrupted() { - // write!(writer, "{}", NEWLINE)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), - // SyntaxKind::CloseBrace); format_preceding_comments(& - // close_brace, writer, formatter, false)?; formatter. - // indent(writer)?; write!(writer, "{}", close_brace)?; - // } - // } - - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for ConditionalStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let if_keyword = first_child_of_kind(self.syntax(), SyntaxKind::IfKeyword); - // formatter.indent(writer)?; - // write!(writer, "{}", if_keyword)?; - // format_inline_comment(&if_keyword, writer, formatter, true)?; - - // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); - // format_preceding_comments(&open_paren, writer, formatter, true)?; - // // Open parens should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_paren)?; - - // let mut paren_on_same_line = true; - // let expr = self.expr(); - // // PERF: This calls `to_string()` which is also called later by `format()` - // // There should be a way to avoid this. - // let multiline_expr = expr.syntax().to_string().contains(NEWLINE); - - // format_inline_comment(&open_paren, writer, formatter, !multiline_expr)?; - // if multiline_expr { - // formatter.increment_indent(); - // paren_on_same_line = false; - // } - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // !multiline_expr, - // )?; - // if formatter.interrupted() || multiline_expr { - // formatter.indent(writer)?; - // paren_on_same_line = false; - // } - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // !multiline_expr, - // )?; - // if formatter.interrupted() { - // paren_on_same_line = false; - // } - - // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); - // format_preceding_comments(&close_paren, writer, formatter, !multiline_expr)?; - // if formatter.interrupted() || !paren_on_same_line { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", close_paren)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for stmt in self.statements() { - // stmt.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for ScatterStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let scatter_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::ScatterKeyword); formatter.indent(writer)?; - // write!(writer, "{}", scatter_keyword)?; - // format_inline_comment(&scatter_keyword, writer, formatter, true)?; - - // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); - // format_preceding_comments(&open_paren, writer, formatter, true)?; - // // Open parens should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_paren)?; - // format_inline_comment(&open_paren, writer, formatter, true)?; - - // let ident = self.variable(); - // format_preceding_comments( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // ident.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let in_keyword = first_child_of_kind(self.syntax(), SyntaxKind::InKeyword); - // format_preceding_comments(&in_keyword, writer, formatter, true)?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", in_keyword)?; - // format_inline_comment(&in_keyword, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); - // format_preceding_comments(&close_paren, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", close_paren)?; - // format_inline_comment(&close_paren, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for stmt in self.statements() { - // stmt.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for WorkflowStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - WorkflowStatement::Call(c) => c.format(writer, formatter), - WorkflowStatement::Conditional(c) => c.format(writer, formatter), - WorkflowStatement::Scatter(s) => s.format(writer, formatter), - WorkflowStatement::Declaration(d) => Decl::Bound(d.clone()).format(writer, formatter), - } - } -} - -impl Formattable for WorkflowDefinition { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let workflow_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::WorkflowKeyword); write!(writer, "{}", - // workflow_keyword)?; format_inline_comment(&workflow_keyword, writer, - // formatter, true)?; - - // let name = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // let mut meta_section_str = String::new(); - // let mut parameter_meta_section_str = String::new(); - // let mut input_section_str = String::new(); - // let mut body_str = String::new(); - // let mut output_section_str = String::new(); - // let mut hints_section_str = String::new(); - - // for item in self.items() { - // match item { - // WorkflowItem::Metadata(m) => { - // m.format(&mut meta_section_str, formatter)?; - // } - // WorkflowItem::ParameterMetadata(pm) => { - // pm.format(&mut parameter_meta_section_str, formatter)?; - // } - // WorkflowItem::Input(i) => { - // i.format(&mut input_section_str, formatter)?; - // } - // WorkflowItem::Call(c) => { - // c.format(&mut body_str, formatter)?; - // } - // WorkflowItem::Conditional(c) => { - // c.format(&mut body_str, formatter)?; - // } - // WorkflowItem::Scatter(s) => { - // s.format(&mut body_str, formatter)?; - // } - // WorkflowItem::Declaration(d) => { - // Decl::Bound(d).format(&mut body_str, formatter)?; - // } - // WorkflowItem::Output(o) => { - // o.format(&mut output_section_str, formatter)?; - // } - // WorkflowItem::Hints(h) => { - // h.format(&mut hints_section_str, formatter)?; - // } - // } - // } - - // let mut first_section = true; - // if !meta_section_str.is_empty() { - // first_section = false; - // write!(writer, "{}", meta_section_str)?; - // } - // if !parameter_meta_section_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", parameter_meta_section_str)?; - // } - // if !input_section_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", input_section_str)?; - // } - // if !body_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", body_str)?; - // } - // if !output_section_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", output_section_str)?; - // } - // if !hints_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", hints_section_str)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} diff --git a/backup/wdl-format-old-2/tests/format.rs b/backup/wdl-format-old-2/tests/format.rs deleted file mode 100644 index d7d6dc5d8..000000000 --- a/backup/wdl-format-old-2/tests/format.rs +++ /dev/null @@ -1,192 +0,0 @@ -//! The format file tests. -//! -//! This test looks for directories in `tests/format`. -//! -//! Each directory is expected to contain: -//! -//! * `source.wdl` - the test input source to parse. -//! * `source.formatted` - the expected formatted output. -//! -//! The `source.formatted` file may be automatically generated or updated by -//! setting the `BLESS` environment variable when running this test. - -use std::collections::HashSet; -use std::env; -use std::ffi::OsStr; -use std::fs; -use std::path::Path; -use std::path::PathBuf; -use std::process::exit; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; - -use codespan_reporting::files::SimpleFile; -use codespan_reporting::term; -use codespan_reporting::term::termcolor::Buffer; -use codespan_reporting::term::Config; -use colored::Colorize; -use pretty_assertions::StrComparison; -use rayon::prelude::*; -use wdl_ast::Diagnostic; -use wdl_format::format_document; - -fn find_tests() -> Vec { - // Check for filter arguments consisting of test names - let mut filter = HashSet::new(); - for arg in std::env::args().skip_while(|a| a != "--").skip(1) { - if !arg.starts_with('-') { - filter.insert(arg); - } - } - - let mut tests: Vec = Vec::new(); - for entry in Path::new("tests/format").read_dir().unwrap() { - let entry = entry.expect("failed to read directory"); - let path = entry.path(); - if !path.is_dir() - || (!filter.is_empty() - && !filter.contains(entry.file_name().to_str().expect("name should be UTF-8"))) - { - continue; - } - - tests.push(path); - } - - tests.sort(); - tests -} - -fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { - let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); - let mut buffer = Buffer::no_color(); - for diagnostic in diagnostics { - term::emit( - &mut buffer, - &Config::default(), - &file, - &diagnostic.to_codespan(), - ) - .expect("should emit"); - } - - String::from_utf8(buffer.into_inner()).expect("should be UTF-8") -} - -fn compare_result(path: &Path, result: &str) -> Result<(), String> { - if env::var_os("BLESS").is_some() { - fs::write(path, &result).map_err(|e| { - format!( - "failed to write result file `{path}`: {e}", - path = path.display() - ) - })?; - return Ok(()); - } - - let expected = fs::read_to_string(path) - .map_err(|e| { - format!( - "failed to read result file `{path}`: {e}", - path = path.display() - ) - })? - .replace("\r\n", "\n"); - - if expected != result { - return Err(format!( - "result is not as expected:\n{}", - StrComparison::new(&expected, &result), - )); - } - - Ok(()) -} - -fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { - let path = test.join("source.wdl"); - let source = std::fs::read_to_string(&path).map_err(|e| { - format!( - "failed to read source file `{path}`: {e}", - path = path.display() - ) - })?; - - let formatted = format_document(&source).map_err(|e| { - format!( - "failed to format `{path}`: {e}", - path = path.display(), - e = format_diagnostics(&e, path.as_path(), &source) - ) - })?; - compare_result(path.with_extension("formatted.wdl").as_path(), &formatted)?; - - ntests.fetch_add(1, Ordering::SeqCst); - Ok(()) -} - -fn main() { - let tests = find_tests(); - println!("\nrunning {} tests\n", tests.len()); - - let ntests = AtomicUsize::new(0); - let errors = tests - .par_iter() - .filter_map(|test| { - let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); - match std::panic::catch_unwind(|| { - match run_test(test, &ntests) - .map_err(|e| format!("failed to run test `{path}`: {e}", path = test.display())) - .err() - { - Some(e) => { - println!("test {test_name} ... {failed}", failed = "failed".red()); - Some((test_name, e)) - } - None => { - println!("test {test_name} ... {ok}", ok = "ok".green()); - None - } - } - }) { - Ok(result) => result, - Err(e) => { - println!( - "test {test_name} ... {panicked}", - panicked = "panicked".red() - ); - Some(( - test_name, - format!( - "test panicked: {e:?}", - e = e - .downcast_ref::() - .map(|s| s.as_str()) - .or_else(|| e.downcast_ref::<&str>().copied()) - .unwrap_or("no panic message") - ), - )) - } - } - }) - .collect::>(); - - if !errors.is_empty() { - eprintln!( - "\n{count} test(s) {failed}:", - count = errors.len(), - failed = "failed".red() - ); - - for (name, msg) in errors.iter() { - eprintln!("{name}: {msg}", msg = msg.red()); - } - - exit(1); - } - - println!( - "\ntest result: ok. {} passed\n", - ntests.load(Ordering::SeqCst) - ); -} diff --git a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt deleted file mode 100644 index d9a98e06c..000000000 --- a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt +++ /dev/null @@ -1,25 +0,0 @@ -'source.wdl' obtained from: https://github.com/ENCODE-DCC/chip-seq-pipeline2/blob/26eeda81a0540dc793fc69b0c390d232ca7ca50a/chip.wdl -on the date 08-05-2024. -It was accompanied by the following license: - -MIT License - -Copyright (c) 2017 ENCODE DCC - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl deleted file mode 100644 index 7c8de0324..000000000 --- a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ /dev/null @@ -1 +0,0 @@ -version 1.0 diff --git a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl b/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl deleted file mode 100644 index 92c09ea84..000000000 --- a/backup/wdl-format-old-2/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl +++ /dev/null @@ -1,3296 +0,0 @@ -version 1.0 - -struct RuntimeEnvironment { - String docker - String singularity - String conda -} - -workflow chip { - String pipeline_ver = 'v2.2.2' - - meta { - version: 'v2.2.2' - - author: 'Jin wook Lee' - email: 'leepc12@gmail.com' - description: 'ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil.' - organization: 'ENCODE DCC' - - specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing' - - default_docker: 'encodedcc/chip-seq-pipeline:v2.2.2' - default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' - croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json' - - parameter_group: { - runtime_environment: { - title: 'Runtime environment', - description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.' - }, - pipeline_metadata: { - title: 'Pipeline metadata', - description: 'Metadata for a pipeline (e.g. title and description).' - }, - reference_genome: { - title: 'Reference genome', - description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.', - help: 'Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.' - }, - input_genomic_data: { - title: 'Input genomic data', - description: 'Genomic input files for experiment.', - help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].' - }, - input_genomic_data_control: { - title: 'Input genomic data (control)', - description: 'Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.', - help: 'Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.' - }, - pipeline_parameter: { - title: 'Pipeline parameter', - description: 'Pipeline type and flags to turn on/off analyses.', - help: 'Use chip.align_only to align FASTQs without peak calling.' - }, - alignment: { - title: 'Alignment', - description: 'Parameters for alignment.', - help: 'Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.' - }, - peak_calling: { - title: 'Peak calling', - description: 'Parameters for peak calling.', - help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.' - }, - resource_parameter: { - title: 'Resource parameter', - description: 'Number of CPUs (threads), max. memory and walltime for tasks.', - help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.' - } - } - } - input { - # group: runtime_environment - String docker = 'encodedcc/chip-seq-pipeline:v2.2.2' - String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' - String conda = 'encd-chip' - String conda_macs2 = 'encd-chip-macs2' - String conda_spp = 'encd-chip-spp' - - # group: pipeline_metadata - String title = 'Untitled' - String description = 'No description' - - # group: reference_genome - File? genome_tsv - String? genome_name - File? ref_fa - File? bwa_idx_tar - File? bowtie2_idx_tar - File? chrsz - File? blacklist - File? blacklist2 - String? mito_chr_name - String? regex_bfilt_peak_chr_name - String? gensz - File? custom_aligner_idx_tar - - # group: input_genomic_data - Boolean? paired_end - Array[Boolean] paired_ends = [] - Array[File] fastqs_rep1_R1 = [] - Array[File] fastqs_rep1_R2 = [] - Array[File] fastqs_rep2_R1 = [] - Array[File] fastqs_rep2_R2 = [] - Array[File] fastqs_rep3_R1 = [] - Array[File] fastqs_rep3_R2 = [] - Array[File] fastqs_rep4_R1 = [] - Array[File] fastqs_rep4_R2 = [] - Array[File] fastqs_rep5_R1 = [] - Array[File] fastqs_rep5_R2 = [] - Array[File] fastqs_rep6_R1 = [] - Array[File] fastqs_rep6_R2 = [] - Array[File] fastqs_rep7_R1 = [] - Array[File] fastqs_rep7_R2 = [] - Array[File] fastqs_rep8_R1 = [] - Array[File] fastqs_rep8_R2 = [] - Array[File] fastqs_rep9_R1 = [] - Array[File] fastqs_rep9_R2 = [] - Array[File] fastqs_rep10_R1 = [] - Array[File] fastqs_rep10_R2 = [] - Array[File] bams = [] - Array[File] nodup_bams = [] - Array[File] tas = [] - Array[File] peaks = [] - Array[File] peaks_pr1 = [] - Array[File] peaks_pr2 = [] - File? peak_ppr1 - File? peak_ppr2 - File? peak_pooled - - Boolean? ctl_paired_end - Array[Boolean] ctl_paired_ends = [] - Array[File] ctl_fastqs_rep1_R1 = [] - Array[File] ctl_fastqs_rep1_R2 = [] - Array[File] ctl_fastqs_rep2_R1 = [] - Array[File] ctl_fastqs_rep2_R2 = [] - Array[File] ctl_fastqs_rep3_R1 = [] - Array[File] ctl_fastqs_rep3_R2 = [] - Array[File] ctl_fastqs_rep4_R1 = [] - Array[File] ctl_fastqs_rep4_R2 = [] - Array[File] ctl_fastqs_rep5_R1 = [] - Array[File] ctl_fastqs_rep5_R2 = [] - Array[File] ctl_fastqs_rep6_R1 = [] - Array[File] ctl_fastqs_rep6_R2 = [] - Array[File] ctl_fastqs_rep7_R1 = [] - Array[File] ctl_fastqs_rep7_R2 = [] - Array[File] ctl_fastqs_rep8_R1 = [] - Array[File] ctl_fastqs_rep8_R2 = [] - Array[File] ctl_fastqs_rep9_R1 = [] - Array[File] ctl_fastqs_rep9_R2 = [] - Array[File] ctl_fastqs_rep10_R1 = [] - Array[File] ctl_fastqs_rep10_R2 = [] - Array[File] ctl_bams = [] - Array[File] ctl_nodup_bams = [] - Array[File] ctl_tas = [] - - # group: pipeline_parameter - String pipeline_type - Boolean align_only = false - Boolean redact_nodup_bam = false - Boolean true_rep_only = false - Boolean enable_count_signal_track = false - Boolean enable_jsd = true - Boolean enable_gc_bias = true - - # group: alignment - String aligner = 'bowtie2' - File? custom_align_py - Boolean use_bwa_mem_for_pe = false - Int bwa_mem_read_len_limit = 70 - Boolean use_bowtie2_local_mode = false - Int crop_length = 0 - Int crop_length_tol = 2 - String trimmomatic_phred_score_format = 'auto' - Int xcor_trim_bp = 50 - Boolean use_filt_pe_ta_for_xcor = false - String dup_marker = 'picard' - Boolean no_dup_removal = false - Int mapq_thresh = 30 - Array[String] filter_chrs = [] - Int subsample_reads = 0 - Int ctl_subsample_reads = 0 - Int xcor_subsample_reads = 15000000 - Int xcor_exclusion_range_min = -500 - Int? xcor_exclusion_range_max - Int pseudoreplication_random_seed = 0 - - # group: peak_calling - Int ctl_depth_limit = 200000000 - Float exp_ctl_depth_ratio_limit = 5.0 - Array[Int?] fraglen = [] - String? peak_caller - Boolean always_use_pooled_ctl = true - Float ctl_depth_ratio = 1.2 - Int? cap_num_peak - Float pval_thresh = 0.01 - Float fdr_thresh = 0.01 - Float idr_thresh = 0.05 - - # group: resource_parameter - Int align_cpu = 6 - Float align_bowtie2_mem_factor = 0.15 - Float align_bwa_mem_factor = 1.0 - Int align_time_hr = 48 - Float align_bowtie2_disk_factor = 8.0 - Float align_bwa_disk_factor = 8.0 - - Int filter_cpu = 4 - Float filter_mem_factor = 0.4 - Int filter_time_hr = 24 - Float filter_disk_factor = 8.0 - - Int bam2ta_cpu = 2 - Float bam2ta_mem_factor = 0.35 - Int bam2ta_time_hr = 6 - Float bam2ta_disk_factor = 4.0 - - Float spr_mem_factor = 20.0 - Float spr_disk_factor = 30.0 - - Int jsd_cpu = 4 - Float jsd_mem_factor = 0.1 - Int jsd_time_hr = 6 - Float jsd_disk_factor = 2.0 - - Int xcor_cpu = 2 - Float xcor_mem_factor = 1.0 - Int xcor_time_hr = 24 - Float xcor_disk_factor = 4.5 - - Float subsample_ctl_mem_factor = 22.0 - Float subsample_ctl_disk_factor = 15.0 - - Float macs2_signal_track_mem_factor = 12.0 - Int macs2_signal_track_time_hr = 24 - Float macs2_signal_track_disk_factor = 80.0 - - Int call_peak_cpu = 6 - Float call_peak_spp_mem_factor = 5.0 - Float call_peak_macs2_mem_factor = 5.0 - Int call_peak_time_hr = 72 - Float call_peak_spp_disk_factor = 5.0 - Float call_peak_macs2_disk_factor = 30.0 - - String? align_trimmomatic_java_heap - String? filter_picard_java_heap - String? gc_bias_picard_java_heap - } - - parameter_meta { - docker: { - description: 'Default Docker image URI to run WDL tasks.', - group: 'runtime_environment', - example: 'ubuntu:20.04' - } - singularity: { - description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.', - group: 'runtime_environment', - example: 'docker://ubuntu:20.04' - } - conda: { - description: 'Default Conda environment name to run WDL tasks. For Conda users only.', - group: 'runtime_environment', - example: 'encd-chip' - } - conda_macs2: { - description: 'Conda environment name for task macs2. For Conda users only.', - group: 'runtime_environment', - example: 'encd-chip-macs2' - } - conda_spp: { - description: 'Conda environment name for tasks spp/xcor. For Conda users only.', - group: 'runtime_environment', - example: 'encd-chip-spp' - } - title: { - description: 'Experiment title.', - group: 'pipeline_metadata', - example: 'ENCSR936XTK (subsampled 1/50)' - } - description: { - description: 'Experiment description.', - group: 'pipeline_metadata', - example: 'ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)' - } - genome_tsv: { - description: 'Reference genome database TSV.', - group: 'reference_genome', - help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.', - example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv' - } - genome_name: { - description: 'Genome name.', - group: 'reference_genome' - } - ref_fa: { - description: 'Reference FASTA file.', - group: 'reference_genome' - } - bowtie2_idx_tar: { - description: 'BWA index TAR file.', - group: 'reference_genome' - } - custom_aligner_idx_tar: { - description: 'Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.', - group: 'reference_genome' - } - chrsz: { - description: '2-col chromosome sizes file.', - group: 'reference_genome' - } - blacklist: { - description: 'Blacklist file in BED format.', - group: 'reference_genome', - help: 'Peaks will be filtered with this file.' - } - blacklist2: { - description: 'Secondary blacklist file in BED format.', - group: 'reference_genome', - help: 'If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.' - } - mito_chr_name: { - description: 'Mitochondrial chromosome name.', - group: 'reference_genome', - help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.' - } - regex_bfilt_peak_chr_name: { - description: 'Reg-ex for chromosomes to keep while filtering peaks.', - group: 'reference_genome', - help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.' - } - gensz: { - description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.', - group: 'reference_genome' - } - paired_end: { - description: 'Sequencing endedness.', - group: 'input_genomic_data', - help: 'Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.', - example: true - } - paired_ends: { - description: 'Sequencing endedness array (for mixed SE/PE datasets).', - group: 'input_genomic_data', - help: 'Whether each biological replicate is paired ended or not.' - } - fastqs_rep1_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 1.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz' - ] - } - fastqs_rep1_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 1.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz' - ] - } - fastqs_rep2_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 2.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz' - ] - } - fastqs_rep2_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 2.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz' - ] - } - fastqs_rep3_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 3.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep3_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 3.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep4_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 4.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep4_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 4.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep5_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 5.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep5_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 5.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep6_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 6.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep6_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 6.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep7_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 7.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep7_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 7.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep8_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 8.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep8_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 8.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep9_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 9.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep9_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 9.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep10_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 10.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep10_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 10.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' - } - bams: { - description: 'List of unfiltered/raw BAM files for each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].' - } - nodup_bams: { - description: 'List of filtered/deduped BAM files for each biological replicate', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].' - } - tas: { - description: 'List of TAG-ALIGN files for each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].' - } - peaks: { - description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.' - } - peaks_pr1: { - description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' - } - peaks_pr2: { - description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' - } - peak_pooled: { - description: 'NARROWPEAK file for pooled true replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.' - } - peak_ppr1: { - description: 'NARROWPEAK file for pooled pseudo replicate 1.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.' - } - peak_ppr2: { - description: 'NARROWPEAK file for pooled pseudo replicate 2.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.' - } - - ctl_paired_end: { - description: 'Sequencing endedness for all controls.', - group: 'input_genomic_data_control', - help: 'Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.' - } - ctl_paired_ends: { - description: 'Sequencing endedness array for mixed SE/PE controls.', - group: 'input_genomic_data_control', - help: 'Whether each control replicate is paired ended or not.' - } - ctl_fastqs_rep1_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 1.', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep1_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 1.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep2_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 2.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep2_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 2.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep3_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 3.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep3_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 3.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep4_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 4.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep4_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 4.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep5_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 5.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep5_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 5.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep6_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 6.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep6_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 6.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep7_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 7.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep7_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 7.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep8_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 8.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep8_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 8.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep9_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 9.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep9_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 9.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep10_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 10.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep10_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 10.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_bams: { - description: 'List of unfiltered/raw BAM files for each control replicate.', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].' - } - ctl_nodup_bams: { - description: 'List of filtered/deduped BAM files for each control replicate', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].' - } - ctl_tas: { - description: 'List of TAG-ALIGN files for each biological replicate.', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].' - } - - pipeline_type: { - description: 'Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.', - group: 'pipeline_parameter', - help: 'Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.', - choices: ['tf', 'histone', 'control'], - example: 'tf' - } - redact_nodup_bam: { - description: 'Redact filtered/nodup BAM.', - group: 'pipeline_parameter', - help: 'Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.' - } - align_only: { - description: 'Align only mode.', - group: 'pipeline_parameter', - help: 'Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.' - } - true_rep_only: { - description: 'Disables all analyses related to pseudo-replicates.', - group: 'pipeline_parameter', - help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).' - } - enable_count_signal_track: { - description: 'Enables generation of count signal tracks.', - group: 'pipeline_parameter' - } - enable_jsd: { - description: 'Enables Jensen-Shannon Distance (JSD) plot generation.', - group: 'pipeline_parameter' - } - enable_gc_bias: { - description: 'Enables GC bias calculation.', - group: 'pipeline_parameter' - } - - aligner: { - description: 'Aligner. bowtie2, bwa or custom', - group: 'alignment', - help: 'It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', - choices: ['bowtie2', 'bwa', 'custom'], - example: 'bowtie2' - } - custom_align_py: { - description: 'Python script for a custom aligner.', - group: 'alignment', - help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".' - } - use_bwa_mem_for_pe: { - description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.', - group: 'alignment', - help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.' - } - bwa_mem_read_len_limit: { - description: 'Read length limit for bwa mem (for PE FASTQs only).', - group: 'alignment', - help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.' - } - use_bowtie2_local_mode: { - description: 'Use bowtie2\'s local mode (soft-clipping).', - group: 'alignment', - help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.' - } - crop_length: { - description: 'Crop FASTQs\' reads longer than this length.', - group: 'alignment', - help: 'Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.' - } - crop_length_tol: { - description: 'Tolerance for cropping reads in FASTQs.', - group: 'alignment', - help: 'Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.' - } - trimmomatic_phred_score_format: { - description: 'Base encoding (format) for Phred score in FASTQs.', - group: 'alignment', - choices: ['auto', 'phred33', 'phred64'], - help: 'This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".' - } - xcor_trim_bp: { - description: 'Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.', - group: 'alignment', - help: 'This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.' - } - use_filt_pe_ta_for_xcor: { - description: 'Use filtered PE BAM for cross-correlation analysis.', - group: 'alignment', - help: 'If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.' - } - dup_marker: { - description: 'Marker for duplicate reads. picard or sambamba.', - group: 'alignment', - help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.', - choices: ['picard', 'sambamba'], - example: 'picard' - } - no_dup_removal: { - description: 'Disable removal of duplicate reads during filtering BAM.', - group: 'alignment', - help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.' - } - mapq_thresh: { - description: 'Threshold for low MAPQ reads removal.', - group: 'alignment', - help: 'Low MAPQ reads are filtered out while filtering BAM.' - } - filter_chrs: { - description: 'List of chromosomes to be filtered out while filtering BAM.', - group: 'alignment', - help: 'It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.' - } - subsample_reads: { - description: 'Subsample reads. Shuffle and subsample reads.', - group: 'alignment', - help: 'This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' - } - ctl_subsample_reads: { - description: 'Subsample control reads. Shuffle and subsample control reads.', - group: 'alignment', - help: 'This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' - } - xcor_subsample_reads: { - description: 'Subsample reads for cross-corrlelation analysis only.', - group: 'alignment', - help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.' - } - xcor_exclusion_range_min: { - description: 'Exclusion minimum for cross-correlation analysis.', - group: 'alignment', - help: 'For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.' - } - xcor_exclusion_range_max: { - description: 'Exclusion maximum for cross-coorrelation analysis.', - group: 'alignment', - help: 'For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used' - } - pseudoreplication_random_seed: { - description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).', - group: 'alignment', - help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.' - } - ctl_depth_limit: { - description: 'Hard limit for chosen control\'s depth.', - group: 'peak_calling', - help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.' - } - exp_ctl_depth_ratio_limit: { - description: 'Second limit for chosen control\'s depth.', - group: 'peak_calling', - help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.' - } - fraglen: { - description: 'Fragment length for each biological replicate.', - group: 'peak_calling', - help: 'Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.' - } - peak_caller: { - description: 'Peak caller.', - group: 'peak_calling', - help: 'It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).', - example: 'spp' - } - always_use_pooled_ctl: { - description: 'Always choose a pooled control for each experiment replicate.', - group: 'peak_calling', - help: 'If turned on, ignores chip.ctl_depth_ratio.' - } - ctl_depth_ratio: { - description: 'Maximum depth ratio between control replicates.', - group: 'peak_calling', - help: 'If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.' - } - - cap_num_peak: { - description: 'Upper limit on the number of peaks.', - group: 'peak_calling', - help: 'It is 30000000 and 50000000 by default for spp and macs2, respectively.' - } - pval_thresh: { - description: 'p-value Threshold for MACS2 peak caller.', - group: 'peak_calling', - help: 'macs2 callpeak -p' - } - fdr_thresh: { - description: 'FDR threshold for spp peak caller (phantompeakqualtools).', - group: 'peak_calling', - help: 'run_spp.R -fdr=' - } - idr_thresh: { - description: 'IDR threshold.', - group: 'peak_calling' - } - - align_cpu: { - description: 'Number of cores for task align.', - group: 'resource_parameter', - help: 'Task align merges/crops/maps FASTQs.' - } - align_bowtie2_mem_factor: { - description: 'Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - align_bwa_mem_factor: { - description: 'Multiplication factor to determine memory required for task align with bwa as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - align_time_hr: { - description: 'Walltime (h) required for task align.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - align_bowtie2_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' - } - align_bwa_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task align with bwa as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' - } - filter_cpu: { - description: 'Number of cores for task filter.', - group: 'resource_parameter', - help: 'Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.' - } - filter_mem_factor: { - description: 'Multiplication factor to determine memory required for task filter.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - filter_time_hr: { - description: 'Walltime (h) required for task filter.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - filter_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task filter.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.' - } - bam2ta_cpu: { - description: 'Number of cores for task bam2ta.', - group: 'resource_parameter', - help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.' - } - bam2ta_mem_factor: { - description: 'Multiplication factor to determine memory required for task bam2ta.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - bam2ta_time_hr: { - description: 'Walltime (h) required for task bam2ta.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - bam2ta_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task bam2ta.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' - } - spr_mem_factor: { - description: 'Multiplication factor to determine memory required for task spr.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - spr_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task spr.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' - } - jsd_cpu: { - description: 'Number of cores for task jsd.', - group: 'resource_parameter', - help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.' - } - jsd_mem_factor: { - description: 'Multiplication factor to determine memory required for task jsd.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - jsd_time_hr: { - description: 'Walltime (h) required for task jsd.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - jsd_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task jsd.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' - } - xcor_cpu: { - description: 'Number of cores for task xcor.', - group: 'resource_parameter', - help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.' - } - xcor_mem_factor: { - description: 'Multiplication factor to determine memory required for task xcor.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - xcor_time_hr: { - description: 'Walltime (h) required for task xcor.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - xcor_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task xcor.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - subsample_ctl_mem_factor: { - description: 'Multiplication factor to determine memory required for task subsample_ctl.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - subsample_ctl_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task subsample_ctl.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - call_peak_cpu: { - description: 'Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.', - group: 'resource_parameter', - help: 'Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.' - } - call_peak_spp_mem_factor: { - description: 'Multiplication factor to determine memory required for task call_peak with spp as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - call_peak_macs2_mem_factor: { - description: 'Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - call_peak_time_hr: { - description: 'Walltime (h) required for task call_peak.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - call_peak_spp_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - call_peak_macs2_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - macs2_signal_track_mem_factor: { - description: 'Multiplication factor to determine memory required for task macs2_signal_track.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - macs2_signal_track_time_hr: { - description: 'Walltime (h) required for task macs2_signal_track.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - macs2_signal_track_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - align_trimmomatic_java_heap: { - description: 'Maximum Java heap (java -Xmx) in task align.', - group: 'resource_parameter', - help: 'Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.' - } - filter_picard_java_heap: { - description: 'Maximum Java heap (java -Xmx) in task filter.', - group: 'resource_parameter', - help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.' - } - gc_bias_picard_java_heap: { - description: 'Maximum Java heap (java -Xmx) in task gc_bias.', - group: 'resource_parameter', - help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.' - } - } - RuntimeEnvironment runtime_environment = { - 'docker': docker, 'singularity': singularity, 'conda': conda - } - RuntimeEnvironment runtime_environment_spp = { - 'docker': docker, 'singularity': singularity, 'conda': conda_spp - } - RuntimeEnvironment runtime_environment_macs2 = { - 'docker': docker, 'singularity': singularity, 'conda': conda_macs2 - } - - # read genome data and paths - if ( defined(genome_tsv) ) { - call read_genome_tsv { input: - genome_tsv = genome_tsv, - runtime_environment = runtime_environment - } - } - File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa]) - File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar - else read_genome_tsv.bwa_idx_tar - File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar]) - File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz]) - String gensz_ = select_first([gensz, read_genome_tsv.gensz]) - File? blacklist1_ = if defined(blacklist) then blacklist - else read_genome_tsv.blacklist - File? blacklist2_ = if defined(blacklist2) then blacklist2 - else read_genome_tsv.blacklist2 - # merge multiple blacklists - # two blacklists can have different number of columns (3 vs 6) - # so we limit merged blacklist's columns to 3 - Array[File] blacklists = select_all([blacklist1_, blacklist2_]) - if ( length(blacklists) > 1 ) { - call pool_ta as pool_blacklist { input: - tas = blacklists, - col = 3, - runtime_environment = runtime_environment - } - } - File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled - else if length(blacklists) > 0 then blacklists[0] - else blacklist2_ - String mito_chr_name_ = select_first([mito_chr_name, read_genome_tsv.mito_chr_name]) - String regex_bfilt_peak_chr_name_ = select_first([regex_bfilt_peak_chr_name, read_genome_tsv.regex_bfilt_peak_chr_name]) - String genome_name_ = select_first([genome_name, read_genome_tsv.genome_name, basename(chrsz_)]) - - ### temp vars (do not define these) - String aligner_ = if defined(custom_align_py) then 'custom' else aligner - String peak_caller_ = if pipeline_type=='tf' then select_first([peak_caller, 'spp']) - else select_first([peak_caller, 'macs2']) - String peak_type_ = if peak_caller_=='spp' then 'regionPeak' - else 'narrowPeak' - Boolean enable_idr = pipeline_type=='tf' # enable_idr for TF chipseq only - String idr_rank_ = if peak_caller_=='spp' then 'signal.value' - else if peak_caller_=='macs2' then 'p.value' - else 'p.value' - Int cap_num_peak_spp = 300000 - Int cap_num_peak_macs2 = 500000 - Int cap_num_peak_ = if peak_caller_ == 'spp' then select_first([cap_num_peak, cap_num_peak_spp]) - else select_first([cap_num_peak, cap_num_peak_macs2]) - Int mapq_thresh_ = mapq_thresh - Boolean enable_xcor_ = if pipeline_type=='control' then false else true - Boolean enable_count_signal_track_ = if pipeline_type=='control' then false else enable_count_signal_track - Boolean enable_jsd_ = if pipeline_type=='control' then false else enable_jsd - Boolean enable_gc_bias_ = if pipeline_type=='control' then false else enable_gc_bias - Boolean align_only_ = if pipeline_type=='control' then true else align_only - - Float align_mem_factor_ = if aligner_ =='bowtie2' then align_bowtie2_mem_factor - else align_bwa_mem_factor - Float align_disk_factor_ = if aligner_ =='bowtie2' then align_bowtie2_disk_factor - else align_bwa_disk_factor - Float call_peak_mem_factor_ = if peak_caller_ =='spp' then call_peak_spp_mem_factor - else call_peak_macs2_mem_factor - Float call_peak_disk_factor_ = if peak_caller_ =='spp' then call_peak_spp_disk_factor - else call_peak_macs2_disk_factor - - # temporary 2-dim fastqs array [rep_id][merge_id] - Array[Array[File]] fastqs_R1 = - if length(fastqs_rep10_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1, fastqs_rep10_R1] - else if length(fastqs_rep9_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1] - else if length(fastqs_rep8_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1] - else if length(fastqs_rep7_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1] - else if length(fastqs_rep6_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1] - else if length(fastqs_rep5_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1] - else if length(fastqs_rep4_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1] - else if length(fastqs_rep3_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1] - else if length(fastqs_rep2_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1] - else if length(fastqs_rep1_R1)>0 then - [fastqs_rep1_R1] - else [] - # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) - Array[Array[File]] fastqs_R2 = - [fastqs_rep1_R2, fastqs_rep2_R2, fastqs_rep3_R2, fastqs_rep4_R2, fastqs_rep5_R2, - fastqs_rep6_R2, fastqs_rep7_R2, fastqs_rep8_R2, fastqs_rep9_R2, fastqs_rep10_R2] - - # temporary 2-dim ctl fastqs array [rep_id][merge_id] - Array[Array[File]] ctl_fastqs_R1 = - if length(ctl_fastqs_rep10_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1] - else if length(ctl_fastqs_rep9_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1] - else if length(ctl_fastqs_rep8_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1] - else if length(ctl_fastqs_rep7_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1] - else if length(ctl_fastqs_rep6_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1] - else if length(ctl_fastqs_rep5_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1] - else if length(ctl_fastqs_rep4_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1] - else if length(ctl_fastqs_rep3_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1] - else if length(ctl_fastqs_rep2_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1] - else if length(ctl_fastqs_rep1_R1)>0 then - [ctl_fastqs_rep1_R1] - else [] - # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) - Array[Array[File]] ctl_fastqs_R2 = - [ctl_fastqs_rep1_R2, ctl_fastqs_rep2_R2, ctl_fastqs_rep3_R2, ctl_fastqs_rep4_R2, ctl_fastqs_rep5_R2, - ctl_fastqs_rep6_R2, ctl_fastqs_rep7_R2, ctl_fastqs_rep8_R2, ctl_fastqs_rep9_R2, ctl_fastqs_rep10_R2] - - # temporary variables to get number of replicates - # WDLic implementation of max(A,B,C,...) - Int num_rep_fastq = length(fastqs_R1) - Int num_rep_bam = if length(bams) 0 || num_ctl_fastq > 0) && aligner_ != 'bwa' && aligner_ != 'bowtie2' && aligner_ != 'custom' ) { - call raise_exception as error_wrong_aligner { input: - msg = 'Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.', - runtime_environment = runtime_environment - } - } - if ( aligner_ != 'bwa' && use_bwa_mem_for_pe ) { - call raise_exception as error_use_bwa_mem_for_non_bwa { input: - msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.', - runtime_environment = runtime_environment - } - } - if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) { - call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: - msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.', - runtime_environment = runtime_environment - } - } - if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) { - call raise_exception as error_custom_aligner { input: - msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', - runtime_environment = runtime_environment - } - } - - if ( ( ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0 ) && num_ctl > 1 && length(ctl_paired_ends) > 1 ) { - call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: - msg = 'Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for ' + - 'multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). ' + - 'Automatic control subsampling is enabled by default. ' + - 'Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. ' + - 'You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done ' + - 'for individual control\'s TAG-ALIGN output according to each control\'s endedness. ', - runtime_environment = runtime_environment - } - } - if ( pipeline_type == 'control' && num_ctl > 0 ) { - call raise_exception as error_ctl_input_defined_in_control_mode { input: - msg = 'In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.', - runtime_environment = runtime_environment - } - } - if ( pipeline_type == 'control' && num_rep_fastq == 0 ) { - call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: - msg = 'Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.', - runtime_environment = runtime_environment - } - } - - # align each replicate - scatter(i in range(num_rep)) { - # to override endedness definition for individual replicate - # paired_end will override paired_ends[i] - Boolean paired_end_ = if !defined(paired_end) && i0 - Boolean has_output_of_align = i0 - Boolean has_output_of_align_ctl = i1 ) { - # pool tagaligns from true replicates - call pool_ta { input : - tas = ta_, - prefix = 'rep', - runtime_environment = runtime_environment - } - } - - # if there are pr1 TAs for ALL replicates then pool them - Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1))==num_rep - if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { - # pool tagaligns from pseudo replicate 1 - call pool_ta as pool_ta_pr1 { input : - tas = spr.ta_pr1, - prefix = 'rep-pr1', - runtime_environment = runtime_environment - } - } - - # if there are pr2 TAs for ALL replicates then pool them - Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2))==num_rep - if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { - # pool tagaligns from pseudo replicate 2 - call pool_ta as pool_ta_pr2 { input : - tas = spr.ta_pr2, - prefix = 'rep-pr2', - runtime_environment = runtime_environment - } - } - - # if there are CTL TAs for ALL replicates then pool them - Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_))==num_ctl - if ( has_all_inputs_of_pool_ta_ctl && num_ctl>1 ) { - # pool tagaligns from true replicates - call pool_ta as pool_ta_ctl { input : - tas = ctl_ta_, - prefix = 'ctl', - runtime_environment = runtime_environment - } - } - - Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) - if ( has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep>1 ) { - call count_signal_track as count_signal_track_pooled { input : - ta = pool_ta.ta_pooled, - chrsz = chrsz_, - runtime_environment = runtime_environment - } - } - - Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_))==num_rep - if ( has_input_of_jsd && num_rep > 0 && enable_jsd_ ) { - # fingerprint and JS-distance plot - call jsd { input : - nodup_bams = nodup_bam_, - ctl_bams = ctl_nodup_bam_, # use first control only - blacklist = blacklist_, - mapq_thresh = mapq_thresh_, - - cpu = jsd_cpu, - mem_factor = jsd_mem_factor, - time_hr = jsd_time_hr, - disk_factor = jsd_disk_factor, - runtime_environment = runtime_environment - } - } - - Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep - && length(select_all(ctl_ta_))==num_ctl && num_ctl > 0 - if ( has_all_input_of_choose_ctl && !align_only_ ) { - # choose appropriate control for each exp IP replicate - # outputs: - # choose_ctl.idx : control replicate index for each exp replicate - # -1 means pooled ctl replicate - call choose_ctl { input: - tas = ta_, - ctl_tas = ctl_ta_, - ta_pooled = pool_ta.ta_pooled, - ctl_ta_pooled = pool_ta_ctl.ta_pooled, - always_use_pooled_ctl = always_use_pooled_ctl, - ctl_depth_ratio = ctl_depth_ratio, - ctl_depth_limit = ctl_depth_limit, - exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, - runtime_environment = runtime_environment - } - } - - scatter(i in range(num_rep)) { - # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] - # chosen_ctl_ta_id - # >=0: control TA index (this means that control TA with this index exists) - # -1: use pooled control - # -2: there is no control - Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then - select_first([choose_ctl.chosen_ctl_ta_ids])[i] else -2 - Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then - select_first([choose_ctl.chosen_ctl_ta_subsample])[i] else 0 - Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false - else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] - else ctl_paired_end_[chosen_ctl_ta_id] - - if ( chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0 ) { - call subsample_ctl { input: - ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled - else ctl_ta_[ chosen_ctl_ta_id ], - subsample = chosen_ctl_ta_subsample, - paired_end = chosen_ctl_paired_end, - mem_factor = subsample_ctl_mem_factor, - disk_factor = subsample_ctl_disk_factor, - runtime_environment = runtime_environment - } - } - Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] - else if chosen_ctl_ta_subsample > 0 then [ select_first([subsample_ctl.ta_subsampled]) ] - else if chosen_ctl_ta_id == -1 then [ select_first([pool_ta_ctl.ta_pooled]) ] - else [ select_first([ctl_ta_[ chosen_ctl_ta_id ]]) ] - } - Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then - select_first([choose_ctl.chosen_ctl_ta_subsample_pooled]) else 0 - - # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) - Array[Int] fraglen_tmp = select_all(fraglen_) - - # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks - scatter(i in range(num_rep)) { - Boolean has_input_of_call_peak = defined(ta_[i]) - Boolean has_output_of_call_peak = i 1 ) { - # rounded mean of fragment length, which will be used for - # 1) calling peaks for pooled true/pseudo replicates - # 2) calculating FRiP - call rounded_mean as fraglen_mean { input : - ints = fraglen_tmp, - runtime_environment = runtime_environment - } - # } - - if ( has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0 ) { - call subsample_ctl as subsample_ctl_pooled { input: - ta = if num_ctl < 2 then ctl_ta_[0] - else pool_ta_ctl.ta_pooled, - subsample = chosen_ctl_ta_pooled_subsample, - paired_end = ctl_paired_end_[0], - mem_factor = subsample_ctl_mem_factor, - disk_factor = subsample_ctl_disk_factor, - runtime_environment = runtime_environment - } - } - # actually not an array - Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] - else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled ] - else if num_ctl < 2 then [ ctl_ta_[0] ] - else [ pool_ta_ctl.ta_pooled ] - - Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) - Boolean has_output_of_call_peak_pooled = defined(peak_pooled) - if ( has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep>1 ) { - # call peaks on pooled replicate - # always call peaks for pooled replicate to get signal tracks - call call_peak as call_peak_pooled { input : - peak_caller = peak_caller_, - peak_type = peak_type_, - tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - cap_num_peak = cap_num_peak_, - pval_thresh = pval_thresh, - fdr_thresh = fdr_thresh, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - - cpu = call_peak_cpu, - mem_factor = call_peak_mem_factor_, - disk_factor = call_peak_disk_factor_, - time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp - else if peak_caller_ == 'macs2' then runtime_environment_macs2 - else runtime_environment - } - } - File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled - else call_peak_pooled.peak - - # macs2 signal track for pooled rep - if ( has_input_of_call_peak_pooled && !align_only_ && num_rep>1 ) { - call macs2_signal_track as macs2_signal_track_pooled { input : - tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - pval_thresh = pval_thresh, - fraglen = fraglen_mean.rounded_mean, - - mem_factor = macs2_signal_track_mem_factor, - disk_factor = macs2_signal_track_disk_factor, - time_hr = macs2_signal_track_time_hr, - runtime_environment = runtime_environment_macs2 - } - } - - Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) - Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) - if ( has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep>1 ) { - # call peaks on 1st pooled pseudo replicates - call call_peak as call_peak_ppr1 { input : - peak_caller = peak_caller_, - peak_type = peak_type_, - tas = flatten([select_all([pool_ta_pr1.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - cap_num_peak = cap_num_peak_, - pval_thresh = pval_thresh, - fdr_thresh = fdr_thresh, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - - cpu = call_peak_cpu, - mem_factor = call_peak_mem_factor_, - disk_factor = call_peak_disk_factor_, - time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp - else if peak_caller_ == 'macs2' then runtime_environment_macs2 - else runtime_environment - } - } - File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 - else call_peak_ppr1.peak - - Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) - Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) - if ( has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep>1 ) { - # call peaks on 2nd pooled pseudo replicates - call call_peak as call_peak_ppr2 { input : - peak_caller = peak_caller_, - peak_type = peak_type_, - tas = flatten([select_all([pool_ta_pr2.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - cap_num_peak = cap_num_peak_, - pval_thresh = pval_thresh, - fdr_thresh = fdr_thresh, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - - cpu = call_peak_cpu, - mem_factor = call_peak_mem_factor_, - disk_factor = call_peak_disk_factor_, - time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp - else if peak_caller_ == 'macs2' then runtime_environment_macs2 - else runtime_environment - } - } - File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 - else call_peak_ppr2.peak - - # do IDR/overlap on all pairs of two replicates (i,j) - # where i and j are zero-based indices and 0 <= i < j < num_rep - scatter( pair in cross(range(num_rep),range(num_rep)) ) { - # pair.left = 0-based index of 1st replicate - # pair.right = 0-based index of 2nd replicate - File? peak1_ = peak_[pair.left] - File? peak2_ = peak_[pair.right] - if ( !align_only_ && pair.left 1 ) { - # Naive overlap on pooled pseudo replicates - call overlap as overlap_ppr { input : - prefix = 'pooled-pr1_vs_pooled-pr2', - peak1 = peak_ppr1_, - peak2 = peak_ppr2_, - peak_pooled = peak_pooled_, - peak_type = peak_type_, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - chrsz = chrsz_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - ta = pool_ta.ta_pooled, - runtime_environment = runtime_environment - } - } - - if ( !align_only_ && !true_rep_only && num_rep > 1 && enable_idr ) { - # IDR on pooled pseduo replicates - call idr as idr_ppr { input : - prefix = 'pooled-pr1_vs_pooled-pr2', - peak1 = peak_ppr1_, - peak2 = peak_ppr2_, - peak_pooled = peak_pooled_, - idr_thresh = idr_thresh, - peak_type = peak_type_, - fraglen = fraglen_mean.rounded_mean, - rank = idr_rank_, - blacklist = blacklist_, - chrsz = chrsz_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - ta = pool_ta.ta_pooled, - runtime_environment = runtime_environment - } - } - - # reproducibility QC for overlap/IDR peaks - if ( !align_only_ && !true_rep_only && num_rep > 0 ) { - # reproducibility QC for overlapping peaks - call reproducibility as reproducibility_overlap { input : - prefix = 'overlap', - peaks = select_all(overlap.bfilt_overlap_peak), - peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([overlap_pr.bfilt_overlap_peak]) else [], - peak_ppr = overlap_ppr.bfilt_overlap_peak, - peak_type = peak_type_, - chrsz = chrsz_, - runtime_environment = runtime_environment - } - } - - if ( !align_only_ && !true_rep_only && num_rep > 0 && enable_idr ) { - # reproducibility QC for IDR peaks - call reproducibility as reproducibility_idr { input : - prefix = 'idr', - peaks = select_all(idr.bfilt_idr_peak), - peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([idr_pr.bfilt_idr_peak]) else [], - peak_ppr = idr_ppr.bfilt_idr_peak, - peak_type = peak_type_, - chrsz = chrsz_, - runtime_environment = runtime_environment - } - } - - # Generate final QC report and JSON - call qc_report { input : - pipeline_ver = pipeline_ver, - title = title, - description = description, - genome = genome_name_, - paired_ends = paired_end_, - ctl_paired_ends = ctl_paired_end_, - pipeline_type = pipeline_type, - aligner = aligner_, - no_dup_removal = no_dup_removal, - peak_caller = peak_caller_, - cap_num_peak = cap_num_peak_, - idr_thresh = idr_thresh, - pval_thresh = pval_thresh, - xcor_trim_bp = xcor_trim_bp, - xcor_subsample_reads = xcor_subsample_reads, - - samstat_qcs = select_all(align.samstat_qc), - nodup_samstat_qcs = select_all(filter.samstat_qc), - dup_qcs = select_all(filter.dup_qc), - lib_complexity_qcs = select_all(filter.lib_complexity_qc), - xcor_plots = select_all(xcor.plot_png), - xcor_scores = select_all(xcor.score), - - ctl_samstat_qcs = select_all(align_ctl.samstat_qc), - ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), - ctl_dup_qcs = select_all(filter_ctl.dup_qc), - ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), - - jsd_plot = jsd.plot, - jsd_qcs = if defined(jsd.jsd_qcs) then select_first([jsd.jsd_qcs]) else [], - - frip_qcs = select_all(call_peak.frip_qc), - frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), - frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), - frip_qc_pooled = call_peak_pooled.frip_qc, - frip_qc_ppr1 = call_peak_ppr1.frip_qc, - frip_qc_ppr2 = call_peak_ppr2.frip_qc, - - idr_plots = select_all(idr.idr_plot), - idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([idr_pr.idr_plot]) else [], - idr_plot_ppr = idr_ppr.idr_plot, - frip_idr_qcs = select_all(idr.frip_qc), - frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([idr_pr.frip_qc]) else [], - frip_idr_qc_ppr = idr_ppr.frip_qc, - frip_overlap_qcs = select_all(overlap.frip_qc), - frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([overlap_pr.frip_qc]) else [], - frip_overlap_qc_ppr = overlap_ppr.frip_qc, - idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, - overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, - - gc_plots = select_all(gc_bias.gc_plot), - - peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), - peak_region_size_plots = select_all(call_peak.peak_region_size_plot), - num_peak_qcs = select_all(call_peak.num_peak_qc), - - idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, - idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, - idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, - - overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, - overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, - overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, - - runtime_environment = runtime_environment - } - - output { - File report = qc_report.report - File qc_json = qc_report.qc_json - Boolean qc_json_ref_match = qc_report.qc_json_ref_match - } -} - -task align { - input { - Array[File] fastqs_R1 # [merge_id] - Array[File] fastqs_R2 - File? ref_fa - Int? trim_bp # this is for R1 only - Int crop_length - Int crop_length_tol - String? trimmomatic_phred_score_format - - String aligner - - String mito_chr_name - Int? multimapping - File? custom_align_py - File? idx_tar # reference index tar - Boolean paired_end - Boolean use_bwa_mem_for_pe - Int bwa_mem_read_len_limit - Boolean use_bowtie2_local_mode - - String? trimmomatic_java_heap - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") - Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb - Float samtools_mem_gb = 0.8 * mem_gb - Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) - - Float trimmomatic_java_heap_factor = 0.9 - Array[Array[File]] tmp_fastqs = if paired_end then transpose([fastqs_R1, fastqs_R2]) - else transpose([fastqs_R1]) - command { - set -e - - # check if pipeline dependencies can be found - if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] - then - echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 - exit 3 - fi - python3 $(which encode_task_merge_fastq.py) \ - ${write_tsv(tmp_fastqs)} \ - ${if paired_end then '--paired-end' else ''} \ - ${'--nth ' + cpu} - - if [ -z '${trim_bp}' ]; then - SUFFIX= - else - SUFFIX=_trimmed - python3 $(which encode_task_trim_fastq.py) \ - R1/*.fastq.gz \ - --trim-bp ${trim_bp} \ - --out-dir R1$SUFFIX - if [ '${paired_end}' == 'true' ]; then - python3 $(which encode_task_trim_fastq.py) \ - R2/*.fastq.gz \ - --trim-bp ${trim_bp} \ - --out-dir R2$SUFFIX - fi - fi - if [ '${crop_length}' == '0' ]; then - SUFFIX=$SUFFIX - else - NEW_SUFFIX="$SUFFIX"_cropped - python3 $(which encode_task_trimmomatic.py) \ - --fastq1 R1$SUFFIX/*.fastq.gz \ - ${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \ - ${if paired_end then '--paired-end' else ''} \ - --crop-length ${crop_length} \ - --crop-length-tol "${crop_length_tol}" \ - ${'--phred-score-format ' + trimmomatic_phred_score_format } \ - --out-dir-R1 R1$NEW_SUFFIX \ - ${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \ - ${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + 'G')} \ - ${'--nth ' + cpu} - SUFFIX=$NEW_SUFFIX - fi - - if [ '${aligner}' == 'bwa' ]; then - python3 $(which encode_task_bwa.py) \ - ${idx_tar} \ - R1$SUFFIX/*.fastq.gz \ - ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ - ${if paired_end then '--paired-end' else ''} \ - ${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \ - ${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - - elif [ '${aligner}' == 'bowtie2' ]; then - python3 $(which encode_task_bowtie2.py) \ - ${idx_tar} \ - R1$SUFFIX/*.fastq.gz \ - ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ - ${'--multimapping ' + multimapping} \ - ${if paired_end then '--paired-end' else ''} \ - ${if use_bowtie2_local_mode then '--local' else ''} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - else - python3 ${custom_align_py} \ - ${idx_tar} \ - R1$SUFFIX/*.fastq.gz \ - ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ - ${if paired_end then '--paired-end' else ''} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - fi - - python3 $(which encode_task_post_align.py) \ - R1$SUFFIX/*.fastq.gz $(ls *.bam) \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - rm -rf R1 R2 R1$SUFFIX R2$SUFFIX - } - output { - File bam = glob('*.bam')[0] - File bai = glob('*.bai')[0] - File samstat_qc = glob('*.samstats.qc')[0] - File read_len_log = glob('*.read_length.txt')[0] - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - preemptible: 0 - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task filter { - input { - File? bam - Boolean paired_end - File? ref_fa - Boolean redact_nodup_bam - String dup_marker # picard.jar MarkDuplicates (picard) or - # sambamba markdup (sambamba) - Int mapq_thresh # threshold for low MAPQ reads removal - Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM - File chrsz # 2-col chromosome sizes file - Boolean no_dup_removal # no dupe reads removal when filtering BAM - String mito_chr_name - - Int cpu - Float mem_factor - String? picard_java_heap - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(bam, "G") - Float picard_java_heap_factor = 0.9 - Float mem_gb = 6.0 + mem_factor * input_file_size_gb - Float samtools_mem_gb = 0.8 * mem_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_filter.py) \ - ${bam} \ - ${if paired_end then '--paired-end' else ''} \ - --multimapping 0 \ - ${'--dup-marker ' + dup_marker} \ - ${'--mapq-thresh ' + mapq_thresh} \ - --filter-chrs ${sep=' ' filter_chrs} \ - ${'--chrsz ' + chrsz} \ - ${if no_dup_removal then '--no-dup-removal' else ''} \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} \ - ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} - - if [ '${redact_nodup_bam}' == 'true' ]; then - python3 $(which encode_task_bam_to_pbam.py) \ - $(ls *.bam) \ - ${'--ref-fa ' + ref_fa} \ - '--delete-original-bam' - fi - } - output { - File nodup_bam = glob('*.bam')[0] - File nodup_bai = glob('*.bai')[0] - File samstat_qc = glob('*.samstats.qc')[0] - File dup_qc = glob('*.dup.qc')[0] - File lib_complexity_qc = glob('*.lib_complexity.qc')[0] - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task bam2ta { - input { - File? bam - Boolean paired_end - String mito_chr_name # mito chromosome name - Int subsample # number of reads to subsample TAGALIGN - # this affects all downstream analysis - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(bam, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Float samtools_mem_gb = 0.8 * mem_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_bam2ta.py) \ - ${bam} \ - --disable-tn5-shift \ - ${if paired_end then '--paired-end' else ''} \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--subsample ' + subsample} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - } - output { - File ta = glob('*.tagAlign.gz')[0] - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task spr { - input { - File? ta - Boolean paired_end - Int pseudoreplication_random_seed - - Float mem_factor - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(ta, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_spr.py) \ - ${ta} \ - ${'--pseudoreplication-random-seed ' + pseudoreplication_random_seed} \ - ${if paired_end then '--paired-end' else ''} - } - output { - File ta_pr1 = glob('*.pr1.tagAlign.gz')[0] - File ta_pr2 = glob('*.pr2.tagAlign.gz')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 4 - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task pool_ta { - input { - Array[File?] tas - Int? col # number of columns in pooled TA - String? prefix # basename prefix - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_pool_ta.py) \ - ${sep=' ' select_all(tas)} \ - ${'--prefix ' + prefix} \ - ${'--col ' + col} - } - output { - File ta_pooled = glob('*.tagAlign.gz')[0] - } - runtime { - cpu : 1 - memory : '8 GB' - time : 4 - disks : 'local-disk 100 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task xcor { - input { - File? ta - Boolean paired_end - String mito_chr_name - Int subsample # number of reads to subsample TAGALIGN - # this will be used for xcor only - # will not affect any downstream analysis - String? chip_seq_type - Int? exclusion_range_min - Int? exclusion_range_max - - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(ta, "G") - Float mem_gb = 8.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_xcor.py) \ - ${ta} \ - ${if paired_end then '--paired-end' else ''} \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--subsample ' + subsample} \ - ${'--chip-seq-type ' + chip_seq_type} \ - ${'--exclusion-range-min ' + exclusion_range_min} \ - ${'--exclusion-range-max ' + exclusion_range_max} \ - ${'--subsample ' + subsample} \ - ${'--nth ' + cpu} - } - output { - File plot_pdf = glob('*.cc.plot.pdf')[0] - File plot_png = glob('*.cc.plot.png')[0] - File score = glob('*.cc.qc')[0] - File fraglen_log = glob('*.cc.fraglen.txt')[0] - Int fraglen = read_int(fraglen_log) - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task jsd { - input { - Array[File?] nodup_bams - Array[File?] ctl_bams - File? blacklist - Int mapq_thresh - - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") - Float mem_gb = 5.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_jsd.py) \ - ${sep=' ' select_all(nodup_bams)} \ - ${if length(ctl_bams)>0 then '--ctl-bam '+ select_first(ctl_bams) else ''} \ - ${'--mapq-thresh '+ mapq_thresh} \ - ${'--blacklist '+ blacklist} \ - ${'--nth ' + cpu} - } - output { - File plot = glob('*.png')[0] - Array[File] jsd_qcs = glob('*.jsd.qc') - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task choose_ctl { - input { - Array[File?] tas - Array[File?] ctl_tas - File? ta_pooled - File? ctl_ta_pooled - Boolean always_use_pooled_ctl # always use pooled control for all exp rep. - Float ctl_depth_ratio # if ratio between controls is higher than this - # then always use pooled control for all exp rep. - Int ctl_depth_limit - Float exp_ctl_depth_ratio_limit - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_choose_ctl.py) \ - --tas ${sep=' ' select_all(tas)} \ - --ctl-tas ${sep=' ' select_all(ctl_tas)} \ - ${'--ta-pooled ' + ta_pooled} \ - ${'--ctl-ta-pooled ' + ctl_ta_pooled} \ - ${if always_use_pooled_ctl then '--always-use-pooled-ctl' else ''} \ - ${'--ctl-depth-ratio ' + ctl_depth_ratio} \ - ${'--ctl-depth-limit ' + ctl_depth_limit} \ - ${'--exp-ctl-depth-ratio-limit ' + exp_ctl_depth_ratio_limit} - } - output { - File chosen_ctl_id_tsv = glob('chosen_ctl.tsv')[0] - File chosen_ctl_subsample_tsv = glob('chosen_ctl_subsample.tsv')[0] - File chosen_ctl_subsample_pooled_txt = glob('chosen_ctl_subsample_pooled.txt')[0] - Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) - Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) - Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task count_signal_track { - input { - File? ta # tag-align - File chrsz # 2-col chromosome sizes file - - RuntimeEnvironment runtime_environment - } - Float mem_gb = 8.0 - - command { - set -e - python3 $(which encode_task_count_signal_track.py) \ - ${ta} \ - ${'--chrsz ' + chrsz} \ - ${'--mem-gb ' + mem_gb} - } - output { - File pos_bw = glob('*.positive.bigwig')[0] - File neg_bw = glob('*.negative.bigwig')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task subsample_ctl { - input { - File? ta - Boolean paired_end - Int subsample - - Float mem_factor - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(ta, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - python3 $(which encode_task_subsample_ctl.py) \ - ${ta} \ - ${'--subsample ' + subsample} \ - ${if paired_end then '--paired-end' else ''} \ - } - output { - File ta_subsampled = glob('*.tagAlign.gz')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 4 - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task call_peak { - input { - String peak_caller - String peak_type - Array[File?] tas # [ta, control_ta]. control_ta is optional - Int fraglen # fragment length from xcor - String gensz # Genome size (sum of entries in 2nd column of - # chr. sizes file, or hs for human, ms for mouse) - File chrsz # 2-col chromosome sizes file - Int cap_num_peak # cap number of raw peaks called from MACS2 - Float pval_thresh # p.value threshold for MACS2 - Float? fdr_thresh # FDR threshold for SPP - - File? blacklist # blacklist BED to filter raw peaks - String? regex_bfilt_peak_chr_name - - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(tas, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - - if [ '${peak_caller}' == 'macs2' ]; then - python3 $(which encode_task_macs2_chip.py) \ - ${sep=' ' select_all(tas)} \ - ${'--gensz '+ gensz} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--cap-num-peak ' + cap_num_peak} \ - ${'--pval-thresh '+ pval_thresh} \ - ${'--mem-gb ' + mem_gb} - - elif [ '${peak_caller}' == 'spp' ]; then - python3 $(which encode_task_spp.py) \ - ${sep=' ' select_all(tas)} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--cap-num-peak ' + cap_num_peak} \ - ${'--fdr-thresh '+ fdr_thresh} \ - ${'--nth ' + cpu} - fi - - python3 $(which encode_task_post_call_peak_chip.py) \ - $(ls *Peak.gz) \ - ${'--ta ' + tas[0]} \ - ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--peak-type ' + peak_type} \ - ${'--blacklist ' + blacklist} - } - output { - File peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] - # generated by post_call_peak py - File bfilt_peak = glob('*.bfilt.'+peak_type+'.gz')[0] - File bfilt_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] - File bfilt_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] - File bfilt_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] - File bfilt_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] - File frip_qc = glob('*.frip.qc')[0] - File peak_region_size_qc = glob('*.peak_region_size.qc')[0] - File peak_region_size_plot = glob('*.peak_region_size.png')[0] - File num_peak_qc = glob('*.num_peak.qc')[0] - } - runtime { - cpu : if peak_caller == 'macs2' then 2 else cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - preemptible: 0 - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task macs2_signal_track { - input { - Array[File?] tas # [ta, control_ta]. control_ta is optional - Int fraglen # fragment length from xcor - String gensz # Genome size (sum of entries in 2nd column of - # chr. sizes file, or hs for human, ms for mouse) - File chrsz # 2-col chromosome sizes file - Float pval_thresh # p.value threshold - - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(tas, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_macs2_signal_track_chip.py) \ - ${sep=' ' select_all(tas)} \ - ${'--gensz '+ gensz} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--pval-thresh '+ pval_thresh} \ - ${'--mem-gb ' + mem_gb} - } - output { - File pval_bw = glob('*.pval.signal.bigwig')[0] - File fc_bw = glob('*.fc.signal.bigwig')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - preemptible: 0 - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task idr { - input { - String prefix # prefix for IDR output file - File? peak1 - File? peak2 - File? peak_pooled - Float idr_thresh - File? blacklist # blacklist BED to filter raw peaks - String regex_bfilt_peak_chr_name - # parameters to compute FRiP - File? ta # to calculate FRiP - Int? fraglen # fragment length from xcor - File chrsz # 2-col chromosome sizes file - String peak_type - String rank - - RuntimeEnvironment runtime_environment - } - - command { - set -e - ${if defined(ta) then '' else 'touch null.frip.qc'} - touch null - python3 $(which encode_task_idr.py) \ - ${peak1} ${peak2} ${peak_pooled} \ - ${'--prefix ' + prefix} \ - ${'--idr-thresh ' + idr_thresh} \ - ${'--peak-type ' + peak_type} \ - --idr-rank ${rank} \ - ${'--fraglen ' + fraglen} \ - ${'--chrsz ' + chrsz} \ - ${'--blacklist '+ blacklist} \ - ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ - ${'--ta ' + ta} - } - output { - File idr_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] - File bfilt_idr_peak = glob('*.bfilt.'+peak_type+'.gz')[0] - File bfilt_idr_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] - File bfilt_idr_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] - File bfilt_idr_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] - File bfilt_idr_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] - File idr_plot = glob('*.txt.png')[0] - File idr_unthresholded_peak = glob('*.txt.gz')[0] - File idr_log = glob('*.idr*.log')[0] - File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task overlap { - input { - String prefix # prefix for IDR output file - File? peak1 - File? peak2 - File? peak_pooled - File? blacklist # blacklist BED to filter raw peaks - String regex_bfilt_peak_chr_name - # parameters to compute FRiP - File? ta # to calculate FRiP - Int? fraglen # fragment length from xcor (for FRIP) - File chrsz # 2-col chromosome sizes file - String peak_type - - RuntimeEnvironment runtime_environment - } - - command { - set -e - ${if defined(ta) then '' else 'touch null.frip.qc'} - touch null - python3 $(which encode_task_overlap.py) \ - ${peak1} ${peak2} ${peak_pooled} \ - ${'--prefix ' + prefix} \ - ${'--peak-type ' + peak_type} \ - ${'--fraglen ' + fraglen} \ - ${'--chrsz ' + chrsz} \ - ${'--blacklist '+ blacklist} \ - --nonamecheck \ - ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ - ${'--ta ' + ta} - } - output { - File overlap_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] - File bfilt_overlap_peak = glob('*.bfilt.'+peak_type+'.gz')[0] - File bfilt_overlap_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] - File bfilt_overlap_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] - File bfilt_overlap_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] - File bfilt_overlap_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] - File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task reproducibility { - input { - String prefix - Array[File] peaks # peak files from pair of true replicates - # in a sorted order. for example of 4 replicates, - # 1,2 1,3 1,4 2,3 2,4 3,4. - # x,y means peak file from rep-x vs rep-y - Array[File] peaks_pr # peak files from pseudo replicates - File? peak_ppr # Peak file from pooled pseudo replicate. - String peak_type - File chrsz # 2-col chromosome sizes file - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_reproducibility.py) \ - ${sep=' ' peaks} \ - --peaks-pr ${sep=' ' peaks_pr} \ - ${'--peak-ppr '+ peak_ppr} \ - --prefix ${prefix} \ - ${'--peak-type ' + peak_type} \ - ${'--chrsz ' + chrsz} - } - output { - File optimal_peak = glob('*optimal_peak.*.gz')[0] - File optimal_peak_bb = glob('*optimal_peak.*.bb')[0] - File optimal_peak_starch = glob('*optimal_peak.*.starch')[0] - File optimal_peak_hammock = glob('*optimal_peak.*.hammock.gz*')[0] - File optimal_peak_hammock_tbi = glob('*optimal_peak.*.hammock.gz*')[1] - File conservative_peak = glob('*conservative_peak.*.gz')[0] - File conservative_peak_bb = glob('*conservative_peak.*.bb')[0] - File conservative_peak_starch = glob('*conservative_peak.*.starch')[0] - File conservative_peak_hammock = glob('*conservative_peak.*.hammock.gz*')[0] - File conservative_peak_hammock_tbi = glob('*conservative_peak.*.hammock.gz*')[1] - File reproducibility_qc = glob('*reproducibility.qc')[0] - # QC metrics for optimal peak - File peak_region_size_qc = glob('*.peak_region_size.qc')[0] - File peak_region_size_plot = glob('*.peak_region_size.png')[0] - File num_peak_qc = glob('*.num_peak.qc')[0] - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task gc_bias { - input { - File? nodup_bam - File ref_fa - - String? picard_java_heap - - RuntimeEnvironment runtime_environment - } - Float mem_factor = 0.3 - Float input_file_size_gb = size(nodup_bam, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Float picard_java_heap_factor = 0.9 - - command { - set -e - python3 $(which encode_task_gc_bias.py) \ - ${'--nodup-bam ' + nodup_bam} \ - ${'--ref-fa ' + ref_fa} \ - ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} - } - output { - File gc_plot = glob('*.gc_plot.png')[0] - File gc_log = glob('*.gc.txt')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 6 - disks : 'local-disk 250 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task qc_report { - input { - # optional metadata - String pipeline_ver - String title # name of sample - String description # description for sample - String? genome - #String? encode_accession_id # ENCODE accession ID of sample - # workflow params - Array[Boolean] paired_ends - Array[Boolean] ctl_paired_ends - String pipeline_type - String aligner - Boolean no_dup_removal - String peak_caller - Int cap_num_peak - Float idr_thresh - Float pval_thresh - Int xcor_trim_bp - Int xcor_subsample_reads - # QCs - Array[File] samstat_qcs - Array[File] nodup_samstat_qcs - Array[File] dup_qcs - Array[File] lib_complexity_qcs - Array[File] ctl_samstat_qcs - Array[File] ctl_nodup_samstat_qcs - Array[File] ctl_dup_qcs - Array[File] ctl_lib_complexity_qcs - Array[File] xcor_plots - Array[File] xcor_scores - File? jsd_plot - Array[File] jsd_qcs - Array[File] idr_plots - Array[File] idr_plots_pr - File? idr_plot_ppr - Array[File] frip_qcs - Array[File] frip_qcs_pr1 - Array[File] frip_qcs_pr2 - File? frip_qc_pooled - File? frip_qc_ppr1 - File? frip_qc_ppr2 - Array[File] frip_idr_qcs - Array[File] frip_idr_qcs_pr - File? frip_idr_qc_ppr - Array[File] frip_overlap_qcs - Array[File] frip_overlap_qcs_pr - File? frip_overlap_qc_ppr - File? idr_reproducibility_qc - File? overlap_reproducibility_qc - - Array[File] gc_plots - - Array[File] peak_region_size_qcs - Array[File] peak_region_size_plots - Array[File] num_peak_qcs - - File? idr_opt_peak_region_size_qc - File? idr_opt_peak_region_size_plot - File? idr_opt_num_peak_qc - - File? overlap_opt_peak_region_size_qc - File? overlap_opt_peak_region_size_plot - File? overlap_opt_num_peak_qc - - File? qc_json_ref - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_qc_report.py) \ - --pipeline-prefix chip \ - ${'--pipeline-ver ' + pipeline_ver} \ - ${"--title '" + sub(title,"'","_") + "'"} \ - ${"--desc '" + sub(description,"'","_") + "'"} \ - ${'--genome ' + genome} \ - ${'--multimapping ' + 0} \ - --paired-ends ${sep=' ' paired_ends} \ - --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ - --pipeline-type ${pipeline_type} \ - --aligner ${aligner} \ - ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ - --peak-caller ${peak_caller} \ - ${'--cap-num-peak ' + cap_num_peak} \ - --idr-thresh ${idr_thresh} \ - --pval-thresh ${pval_thresh} \ - --xcor-trim-bp ${xcor_trim_bp} \ - --xcor-subsample-reads ${xcor_subsample_reads} \ - --samstat-qcs ${sep='_:_' samstat_qcs} \ - --nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \ - --dup-qcs ${sep='_:_' dup_qcs} \ - --lib-complexity-qcs ${sep='_:_' lib_complexity_qcs} \ - --xcor-plots ${sep='_:_' xcor_plots} \ - --xcor-scores ${sep='_:_' xcor_scores} \ - --idr-plots ${sep='_:_' idr_plots} \ - --idr-plots-pr ${sep='_:_' idr_plots_pr} \ - --ctl-samstat-qcs ${sep='_:_' ctl_samstat_qcs} \ - --ctl-nodup-samstat-qcs ${sep='_:_' ctl_nodup_samstat_qcs} \ - --ctl-dup-qcs ${sep='_:_' ctl_dup_qcs} \ - --ctl-lib-complexity-qcs ${sep='_:_' ctl_lib_complexity_qcs} \ - ${'--jsd-plot ' + jsd_plot} \ - --jsd-qcs ${sep='_:_' jsd_qcs} \ - ${'--idr-plot-ppr ' + idr_plot_ppr} \ - --frip-qcs ${sep='_:_' frip_qcs} \ - --frip-qcs-pr1 ${sep='_:_' frip_qcs_pr1} \ - --frip-qcs-pr2 ${sep='_:_' frip_qcs_pr2} \ - ${'--frip-qc-pooled ' + frip_qc_pooled} \ - ${'--frip-qc-ppr1 ' + frip_qc_ppr1} \ - ${'--frip-qc-ppr2 ' + frip_qc_ppr2} \ - --frip-idr-qcs ${sep='_:_' frip_idr_qcs} \ - --frip-idr-qcs-pr ${sep='_:_' frip_idr_qcs_pr} \ - ${'--frip-idr-qc-ppr ' + frip_idr_qc_ppr} \ - --frip-overlap-qcs ${sep='_:_' frip_overlap_qcs} \ - --frip-overlap-qcs-pr ${sep='_:_' frip_overlap_qcs_pr} \ - ${'--frip-overlap-qc-ppr ' + frip_overlap_qc_ppr} \ - ${'--idr-reproducibility-qc ' + idr_reproducibility_qc} \ - ${'--overlap-reproducibility-qc ' + overlap_reproducibility_qc} \ - --gc-plots ${sep='_:_' gc_plots} \ - --peak-region-size-qcs ${sep='_:_' peak_region_size_qcs} \ - --peak-region-size-plots ${sep='_:_' peak_region_size_plots} \ - --num-peak-qcs ${sep='_:_' num_peak_qcs} \ - ${'--idr-opt-peak-region-size-qc ' + idr_opt_peak_region_size_qc} \ - ${'--idr-opt-peak-region-size-plot ' + idr_opt_peak_region_size_plot} \ - ${'--idr-opt-num-peak-qc ' + idr_opt_num_peak_qc} \ - ${'--overlap-opt-peak-region-size-qc ' + overlap_opt_peak_region_size_qc} \ - ${'--overlap-opt-peak-region-size-plot ' + overlap_opt_peak_region_size_plot} \ - ${'--overlap-opt-num-peak-qc ' + overlap_opt_num_peak_qc} \ - --out-qc-html qc.html \ - --out-qc-json qc.json \ - ${'--qc-json-ref ' + qc_json_ref} - } - output { - File report = glob('*qc.html')[0] - File qc_json = glob('*qc.json')[0] - Boolean qc_json_ref_match = read_string('qc_json_ref_match.txt')=='True' - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -### workflow system tasks -task read_genome_tsv { - input { - File? genome_tsv - String? null_s - - RuntimeEnvironment runtime_environment - } - command <<< - echo "$(basename ~{genome_tsv})" > genome_name - # create empty files for all entries - touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 - touch mito_chr_name - touch regex_bfilt_peak_chr_name - - python <>> - output { - String? genome_name = read_string('genome_name') - String? ref_fa = if size('ref_fa')==0 then null_s else read_string('ref_fa') - String? bwa_idx_tar = if size('bwa_idx_tar')==0 then null_s else read_string('bwa_idx_tar') - String? bowtie2_idx_tar = if size('bowtie2_idx_tar')==0 then null_s else read_string('bowtie2_idx_tar') - String? chrsz = if size('chrsz')==0 then null_s else read_string('chrsz') - String? gensz = if size('gensz')==0 then null_s else read_string('gensz') - String? blacklist = if size('blacklist')==0 then null_s else read_string('blacklist') - String? blacklist2 = if size('blacklist2')==0 then null_s else read_string('blacklist2') - String? mito_chr_name = if size('mito_chr_name')==0 then null_s else read_string('mito_chr_name') - String? regex_bfilt_peak_chr_name = if size('regex_bfilt_peak_chr_name')==0 then 'chr[\\dXY]+' - else read_string('regex_bfilt_peak_chr_name') - } - runtime { - maxRetries : 0 - cpu : 1 - memory : '2 GB' - time : 4 - disks : 'local-disk 10 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task rounded_mean { - input { - Array[Int] ints - - RuntimeEnvironment runtime_environment - } - command <<< - python <>> - output { - Int rounded_mean = read_int('tmp.txt') - } - runtime { - cpu : 1 - memory : '2 GB' - time : 4 - disks : 'local-disk 10 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task raise_exception { - input { - String msg - - RuntimeEnvironment runtime_environment - } - command { - echo -e "\n* Error: ${msg}\n" >&2 - exit 2 - } - output { - String error_msg = '${msg}' - } - runtime { - maxRetries : 0 - cpu : 1 - memory : '2 GB' - time : 4 - disks : 'local-disk 10 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl deleted file mode 100644 index 281e31443..000000000 --- a/backup/wdl-format-old-2/tests/format/clays_complex_script/source.formatted.wdl +++ /dev/null @@ -1,7 +0,0 @@ -## # Header -# regular comment will be left as is -## part of preamble - -#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing -#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput -version 1.2 diff --git a/backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl b/backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl deleted file mode 100644 index 81faa4fa6..000000000 --- a/backup/wdl-format-old-2/tests/format/clays_complex_script/source.wdl +++ /dev/null @@ -1,165 +0,0 @@ -## # Header -# regular comment will be left as is -#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing -#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput - -## part of preamble -version 1.2 - -#@ except: MissingMetas -struct AStruct { - String member -} - -task a_task { - meta - # Here is a comment between `meta` and the parenthesis. - { - # Here is a comment within `meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. - { - # Here is a comment within `parameter_meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - input - # Here is a comment before the input. - { - Object an_object - String a_string - Boolean a_boolean - Int an_integer - Float a_float - AStruct a_struct # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - } - - command <<< >>> - - output - # Here is a comment before the output. - { - Object some_other_object = {} - String some_other_string = "foo bar baz" - Boolean some_other_boolean = true - Int some_other_integer = 42 - Float some_other_float = 0e3 - # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - AStruct some_other_struct = AStruct {} - } - - requirements - # This is a comment before the requirements. - { - container: "ubuntu:latest" - } - - hints { - max_cpu: 1 - } -} - -## These double-pound-sign comments -## should be converted to single-pound-sign comments. -workflow hello { - meta - # Here is a comment between `meta` and the parenthesis. - { - # Here is a comment within `meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. - { - # Here is a comment within `parameter_meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] ## This should be converted to a single-pound-sign comment. - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - input { - Object an_object - String a_string - Boolean a_boolean - Int an_integer - Float a_float - AStruct a_struct # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - } - - call a_task { - } - - scatter (name in name_array) { - call say_task { greeting = greeting } - } - - if (some_condition_task) { - call a_task as task_two {} - } - - output - # Here is a comment before the output. - { - Object some_other_object = {} - String some_other_string = "foo bar baz" - Boolean some_other_boolean = true - Int some_other_integer = 42 - Float some_other_float = 0e3 - # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - AStruct some_other_struct = AStruct {} - } -} \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl deleted file mode 100644 index 7c8de0324..000000000 --- a/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.formatted.wdl +++ /dev/null @@ -1 +0,0 @@ -version 1.0 diff --git a/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl b/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl deleted file mode 100644 index 7e3333f0a..000000000 --- a/backup/wdl-format-old-2/tests/format/complex_meta_and_calls/source.wdl +++ /dev/null @@ -1,106 +0,0 @@ -version -1.0 -workflow -test_wf -{ -input -{ -SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { -noncanonical_motifs: 30, -GT_AG_and_CT_AC_motif: 12, -} -} -parameter_meta -{ -out_sj_filter_overhang_min: { -type: "SpliceJunctionMotifs", -label: "Minimum overhang required to support a splicing junction" -} -} -output -{ -SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min -String a = "friend" -Int b = 1 + 2 -String c = "Hello, ~{a}" -Map[String, Int] d = { "a": 0, "b": 1, "c": 2} -} -meta { -a: "hello" -b: 'world' -c: 5 -d: -0xf -e: 1.0e10 -f: -2. -g: true -h: false -i: null -j: { -a: [1, 2, 3], -b: ["hello", "world", "!"], -c: { -x: 1, -y: 2, -z: 3 -} -} -k: [ -{ -a: {}, -b: 0, -c: "", -d: '', -e: [], -}, -{ -x: [1.0, 2.0, 3.0] -} -] -} -call no_params -call with_params { input: a, b, c, d = 1 } -call qualified.name -call qualified.name { input: a = 1, b = 2, c = "3" } -call aliased as x -call aliased as x { input: } -call f after x after y -call f after x after y { input: a = [] } -call f as x after x -call f as x after x after y { input: name = "hello" } -call test_task as foo { -input: bowchicka = "wowwow" -} -if ( -true -) { - -call test_task after foo { -input: bowchicka = "bowchicka" -} -scatter (i in range(3)) { -call test_task as bar { -input: bowchicka = i * 42 -} -} -} - -} -task -test_task -{ -command <<<>>> -input { -String bowchicka -} -parameter_meta { -bowchicka: { -type: "String", -label: "Bowchicka" -} -} -} - -struct SpliceJunctionMotifs { -Int noncanonical_motifs -Int GT_AG_and_CT_AC_motif -} diff --git a/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl deleted file mode 100644 index 6a2d1da70..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.formatted.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 - -# fileA 1.1 -import # fileA 1.2 - # fileA 2.1 - # fileA 2.2 - "fileA.wdl" # fileA 2.3 - # fileA 3.1 - as # fileA 3.2 - # fileA 4.1 - bar # fileA 4.2 - # fileA 5.1 - alias # fileA 5.2 - # fileA 6.1 - qux # fileA 6.2 - # fileA 7.1 - as # fileA 7.2 - # fileA 8.1 - Qux # fileA 8.2 -# this comment belongs to fileB -import "fileB.wdl" as foo # also fileB -# this comment belongs to fileC -import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl deleted file mode 100644 index 1c32809f6..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_both_comments/source.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 -# this comment belongs to fileB -import "fileB.wdl" as foo # also fileB -# fileA 1.1 -import # fileA 1.2 -# fileA 2.1 -# fileA 2.2 -"fileA.wdl" # fileA 2.3 -# fileA 3.1 -as # fileA 3.2 -# fileA 4.1 -bar # fileA 4.2 -# fileA 5.1 -alias # fileA 5.2 -# fileA 6.1 -qux # fileA 6.2 -# fileA 7.1 -as # fileA 7.2 -# fileA 8.1 -Qux # fileA 8.2 -workflow test {} -# this comment belongs to fileC -import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl deleted file mode 100644 index e23115af6..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.formatted.wdl +++ /dev/null @@ -1,12 +0,0 @@ -version 1.0 - -import # fileA 1 - "fileA.wdl" # fileA 2 - as # fileA 3 - bar # fileA 4 - alias # fileA 5 - qux # fileA 6 - as # fileA 7 - Qux # fileA 8 -import "fileB.wdl" as foo # fileB -import "fileC.wdl" # fileC diff --git a/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl deleted file mode 100644 index f633e72d8..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_inline_comments/source.wdl +++ /dev/null @@ -1,12 +0,0 @@ -version 1.0 -import "fileB.wdl" as foo # fileB -workflow test {} -import "fileC.wdl" # fileC -import # fileA 1 -"fileA.wdl" # fileA 2 -as # fileA 3 -bar # fileA 4 -alias # fileA 5 -qux # fileA 6 -as # fileA 7 -Qux # fileA 8 diff --git a/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl deleted file mode 100644 index 564a6c05f..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.formatted.wdl +++ /dev/null @@ -1,5 +0,0 @@ -version 1.1 - -import "fileA.wdl" as bar alias cows as horses alias cats as dogs -import "fileB.wdl" as foo -import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl deleted file mode 100644 index e69a1a727..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_no_comments/source.wdl +++ /dev/null @@ -1,7 +0,0 @@ - version 1.1 - - import "fileB.wdl" as foo - import "fileA.wdl" as bar alias cows as horses - alias cats as dogs - workflow test {} - import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl deleted file mode 100644 index 8b07048e9..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.formatted.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 - -# fileA 1 -import - # fileA 2.1 - # fileA 2.2 - "fileA.wdl" - # fileA 3 - as - # fileA 4 - bar - # fileA 5 - alias - # fileA 6 - qux - # fileA 7 - as - # fileA 8 - Qux -# this comment belongs to fileB -import "fileB.wdl" as foo -# this comment belongs to fileC -import "fileC.wdl" diff --git a/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl b/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl deleted file mode 100644 index a27e7a4fc..000000000 --- a/backup/wdl-format-old-2/tests/format/imports_with_preceding_comments/source.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 -workflow test {} -# this comment belongs to fileC -import "fileC.wdl" -# this comment belongs to fileB -import "fileB.wdl" as foo -# fileA 1 -import -# fileA 2.1 -# fileA 2.2 -"fileA.wdl" -# fileA 3 -as -# fileA 4 -bar -# fileA 5 -alias -# fileA 6 -qux -# fileA 7 -as -# fileA 8 -Qux diff --git a/backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl deleted file mode 100644 index 6cd003333..000000000 --- a/backup/wdl-format-old-2/tests/format/interrupt_example/source.formatted.wdl +++ /dev/null @@ -1,2 +0,0 @@ -version # interrupt - 1.2 # how far should '1.2' be indented? diff --git a/backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl b/backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl deleted file mode 100644 index 30e667287..000000000 --- a/backup/wdl-format-old-2/tests/format/interrupt_example/source.wdl +++ /dev/null @@ -1,10 +0,0 @@ -version # interrupt -1.2 # how far should '1.2' be indented? - -workflow -# interrupt -test # should this be indented? -# interrupt -{ meta # interrupt -{ # how far should this bracket be indented? -}} \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt b/backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt deleted file mode 100644 index 335221306..000000000 --- a/backup/wdl-format-old-2/tests/format/seaseq-case/LICENSE.txt +++ /dev/null @@ -1,205 +0,0 @@ -'source.wdl' obtained from: https://github.com/stjude/seaseq/blob/49493a7097e655671b915171e6debe40fa284200/seaseq-case.wdl -on the date 08-05-2024. -It was accompanied by the following license: - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl b/backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl deleted file mode 100644 index a25a05879..000000000 --- a/backup/wdl-format-old-2/tests/format/seaseq-case/source.formatted.wdl +++ /dev/null @@ -1,17 +0,0 @@ -version 1.0 - -import "workflows/tasks/bedtools.wdl" -import "workflows/tasks/bowtie.wdl" -import "workflows/tasks/fastqc.wdl" -import "workflows/tasks/macs.wdl" -import "workflows/tasks/rose.wdl" -import "workflows/tasks/runspp.wdl" -import "workflows/tasks/samtools.wdl" -import "workflows/tasks/seaseq_util.wdl" as util -import "workflows/tasks/sicer.wdl" -import "workflows/tasks/sortbed.wdl" -import "workflows/tasks/sratoolkit.wdl" as sra -import "workflows/workflows/bamtogff.wdl" -import "workflows/workflows/mapping.wdl" -import "workflows/workflows/motifs.wdl" -import "workflows/workflows/visualization.wdl" as viz diff --git a/backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl b/backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl deleted file mode 100644 index 94c76656e..000000000 --- a/backup/wdl-format-old-2/tests/format/seaseq-case/source.wdl +++ /dev/null @@ -1,898 +0,0 @@ -version 1.0 -import "workflows/tasks/fastqc.wdl" -import "workflows/tasks/bedtools.wdl" -import "workflows/tasks/bowtie.wdl" -import "workflows/tasks/samtools.wdl" -import "workflows/tasks/macs.wdl" -import "workflows/workflows/bamtogff.wdl" -import "workflows/tasks/sicer.wdl" -import "workflows/workflows/motifs.wdl" -import "workflows/tasks/rose.wdl" -import "workflows/tasks/seaseq_util.wdl" as util -import "workflows/workflows/visualization.wdl" as viz -import "workflows/workflows/mapping.wdl" -import "workflows/tasks/runspp.wdl" -import "workflows/tasks/sortbed.wdl" -import "workflows/tasks/sratoolkit.wdl" as sra - -workflow seaseq { - String pipeline_ver = 'v2.0.0' - - meta { - title: 'SEAseq Analysis' - summary: 'Single-End Antibody Sequencing (SEAseq) Pipeline' - description: 'A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis.' - version: '2.0.0' - details: { - citation: 'https://doi.org/10.1186/s12859-022-04588-z', - contactEmail: 'modupeore.adetunji@stjude.org', - contactOrg: "St Jude Children's Research Hospital", - contactUrl: "", - upstreamLicenses: "MIT", - upstreamUrl: 'https://github.com/stjude/seaseq', - whatsNew: [ - { - version: "2.0", - changes: ["version of case/sample only", "single-end sequencing with input/control sequencing data", "Initial release"] - } - ] - } - parameter_group: { - reference_genome: { - title: 'Reference genome', - description: 'Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .', - help: 'Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.' - }, - input_genomic_data: { - title: 'Input FASTQ data', - description: 'Genomic input files for experiment.', - help: 'Input one or more sample data and/or SRA identifiers.' - }, - analysis_parameter: { - title: 'Analysis parameter', - description: 'Analysis settings needed for experiment.', - help: 'Analysis settings; such output analysis file name.' - } - } - } - input { - # group: reference_genome - File reference - File? spikein_reference - File? blacklist - File gtf - Array[File]? bowtie_index - Array[File]? spikein_bowtie_index - Array[File]? motif_databases - - # group: input_genomic_data - Array[String]? sample_sraid - Array[File]? sample_fastq - - # group: analysis_parameter - String? results_name - Boolean run_motifs=true - - } - - parameter_meta { - reference: { - description: 'Reference FASTA file', - group: 'reference_genome', - patterns: ["*.fa", "*.fasta", "*.fa.gz", "*.fasta.gz"] - } - blacklist: { - description: 'Blacklist file in BED format', - group: 'reference_genome', - help: 'If defined, blacklist regions listed are excluded after reference alignment.', - patterns: ["*.bed", "*.bed.gz"] - } - gtf: { - description: 'gene annotation file (.gtf)', - group: 'reference_genome', - help: 'Input gene annotation file from RefSeq or GENCODE (.gtf).', - patterns: ["*.gtf", "*.gtf.gz", "*.gff", "*.gff.gz", "*.gff3", "*.gff3.gz"] - } - bowtie_index: { - description: 'bowtie v1 index files (*.ebwt)', - group: 'reference_genome', - help: 'If not defined, bowtie v1 index files are generated, will take a longer compute time.', - patterns: ["*.ebwt"] - } - motif_databases: { - description: 'One or more of the MEME suite motif databases (*.meme)', - group: 'reference_genome', - help: 'Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).', - patterns: ["*.meme"] - } - sample_sraid: { - description: 'One or more sample SRA (Sequence Read Archive) run identifiers', - group: 'input_genomic_data', - help: 'Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).', - example: 'SRR12345678' - } - sample_fastq: { - description: 'One or more sample FASTQs', - group: 'input_genomic_data', - help: 'Upload zipped FASTQ files.', - patterns: ["*.fq.gz", "*.fastq.gz"] - } - results_name: { - description: 'Experiment results custom name', - group: 'analysis_parameter', - help: 'Input preferred analysis results name (recommended if multiple FASTQs are provided).', - example: 'AllMerge_mapped' - } - run_motifs: { - description: 'Perform Motif Analysis', - group: 'analysis_parameter', - help: 'Setting this means Motif Discovery and Enrichment analysis will be performed.', - example: true - } - } - -### ---------------------------------------- ### -### ------------ S E C T I O N 1 ----------- ### -### ------ Pre-process Analysis Files ------ ### -### ---------------------------------------- ### - - # Process SRRs - if ( defined(sample_sraid) ) { - # Download sample file(s) from SRA database - # outputs: - # fastqdump.fastqfile : downloaded sample files in fastq.gz format - Array[String] string_sra = [1] #buffer to allow for sra_id optionality - Array[String] s_sraid = select_first([sample_sraid, string_sra]) - scatter (eachsra in s_sraid) { - call sra.fastqdump { - input : - sra_id=eachsra, - cloud=false - } - } # end scatter each sra - - Array[File] sample_srafile = flatten(fastqdump.fastqfile) - } # end if sample_sraid - - # Generating INDEX files - #1. Bowtie INDEX files if not provided - if ( !defined(bowtie_index) ) { - # create bowtie index when not provided - call bowtie.index as bowtie_idx { - input : - reference=reference - } - } - #2. Make sure indexes are six else build indexes - if ( defined(bowtie_index) ) { - # check total number of bowtie indexes provided - Array[String] string_bowtie_index = [1] #buffer to allow for bowtie_index optionality - Array[File] int_bowtie_index = select_first([bowtie_index, string_bowtie_index]) - if ( length(int_bowtie_index) != 6 ) { - # create bowtie index if 6 index files aren't provided - call bowtie.index as bowtie_idx_2 { - input : - reference=reference - } - } - } - Array[File] actual_bowtie_index = select_first([bowtie_idx_2.bowtie_indexes, bowtie_idx.bowtie_indexes, bowtie_index]) - - # Spike-in DNA - #3. Bowtie INDEX files if not provided - String string_spikein = "1" - Array[String] string_spikein_buffer = [1] - if ( !defined(spikein_bowtie_index) && defined(spikein_reference) ) { - # create bowtie index on spikein genome - call bowtie.index as spikein_bowtie_idx { - input : - reference=select_first([spikein_reference, string_spikein]) - } - } - - #4. Make sure indexes are six else build indexes for Spike-in DNA - if ( defined(spikein_bowtie_index) ) { - # check total number of bowtie indexes provided - Array[File] int_spikein_bowtie_index = select_first([spikein_bowtie_index, string_spikein_buffer]) - if ( length(int_spikein_bowtie_index) != 6 ) { - # create bowtie index if 6 index files aren't provided - call bowtie.index as spikein_bowtie_idx_2 { - input : - reference=select_first([spikein_reference, string_spikein]) - } - } - } - Array[File] actual_spikein_bowtie_index = select_first([spikein_bowtie_idx_2.bowtie_indexes, spikein_bowtie_idx.bowtie_indexes, spikein_bowtie_index, string_spikein_buffer]) - - # FASTA faidx and chromsizes and effective genome size - call samtools.faidx as samtools_faidx { - # create FASTA index and chrom sizes files - input : - reference=reference - } - call util.effective_genome_size as egs { - # effective genome size for FASTA - input : - reference=reference - } - - # Process FASTQs - if ( defined(sample_fastq) ) { - - Array[String] string_fastq = [1] #buffer to allow for fastq optionality - Array[File] s_fastq = select_first([sample_fastq, string_fastq]) - - Array[File] sample_fastqfile = s_fastq - } - Array[File] original_fastqfiles = flatten(select_all([sample_srafile, sample_fastqfile])) - -### ------------------------------------------------- ### -### ---------------- S E C T I O N 1 ---------------- ### -### ----------- B: remove Spike-IN reads ------------ ### -### ------------------------------------------------- ### - - # if multiple fastqfiles are provided - Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false - Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false - - if ( defined(spikein_bowtie_index) || defined(spikein_reference) ) { - scatter (eachfastq in original_fastqfiles) { - call fastqc.fastqc as spikein_indv_fastqc { - input : - inputfile=eachfastq, - default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' - } - call util.basicfastqstats as spikein_indv_bfs { - input : - fastqfile=eachfastq, - default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' - } - call bowtie.spikein_SE as spikein_indv_map { - input : - fastqfile=eachfastq, - index_files=actual_spikein_bowtie_index, - metricsfile=spikein_indv_bfs.metrics_out, - default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' - } - } - - Array[File] spikein_fastqfiles = spikein_indv_map.unaligned - } - Array[File] fastqfiles = select_first([spikein_fastqfiles, original_fastqfiles]) - -### ------------------------------------------------- ### -### ---------------- S E C T I O N 2 ---------------- ### -### ---- A: analysis if multiple FASTQs provided ---- ### -### ------------------------------------------------- ### - - if ( multi_fastq ) { - scatter (eachfastq in fastqfiles) { - # Execute analysis on each fastq file provided - # Analysis executed: - # FastQC - # FASTQ read length distribution - # Reference Alignment using Bowtie (-k2 -m2) - # Convert SAM to BAM - # FastQC on BAM files - # Remove Blacklists (if provided) - # Remove read duplicates - # Summary statistics on FASTQs - # Combine html files into one for easy viewing - - call fastqc.fastqc as indv_fastqc { - input : - inputfile=eachfastq, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call util.basicfastqstats as indv_bfs { - input : - fastqfile=eachfastq, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' - } - - call mapping.mapping as indv_mapping { - input : - fastqfile=eachfastq, - index_files=actual_bowtie_index, - metricsfile=indv_bfs.metrics_out, - blacklist=blacklist, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/BAM_files' - } - - call fastqc.fastqc as indv_bamfqc { - input : - inputfile=indv_mapping.sorted_bam, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call runspp.runspp as indv_runspp { - input: - bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) - } - - call bedtools.bamtobed as indv_bamtobed { - input: - bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) - } - - call util.evalstats as indv_summarystats { - input: - fastq_type="SEAseq Sample FASTQ", - bambed=indv_bamtobed.bedfile, - sppfile=indv_runspp.spp_out, - fastqczip=indv_fastqc.zipfile, - bamflag=indv_mapping.bam_stats, - rmdupflag=indv_mapping.mkdup_stats, - bkflag=indv_mapping.bklist_stats, - fastqmetrics=indv_bfs.metrics_out, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' - } - } # end scatter (for each sample fastq) - - # MERGE BAM FILES - # Execute analysis on merge bam file - # Analysis executed: - # Merge BAM (if more than 1 fastq is provided) - # FastQC on Merge BAM (AllMerge__mapped) - - # merge bam files and perform fasTQC if more than one is provided - call util.mergehtml { - input: - htmlfiles=indv_summarystats.xhtml, - txtfiles=indv_summarystats.textfile, - default_location='SAMPLE', - outputfile = 'AllMapped_' + length(fastqfiles) + '_seaseq-summary-stats.html' - } - - call samtools.mergebam { - input: - bamfiles=indv_mapping.sorted_bam, - metricsfiles=indv_bfs.metrics_out, - default_location = if defined(results_name) then results_name + '/BAM_files' else 'AllMerge_' + length(indv_mapping.sorted_bam) + '_mapped' + '/BAM_files', - outputfile = if defined(results_name) then results_name + '.sorted.bam' else 'AllMerge_' + length(fastqfiles) + '_mapped.sorted.bam' - } - - call fastqc.fastqc as mergebamfqc { - input: - inputfile=mergebam.mergebam, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/QC/FastQC' - } - - call samtools.indexstats as mergeindexstats { - input: - bamfile=mergebam.mergebam, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' - } - - if ( defined(blacklist) ) { - # remove blacklist regions - String string_blacklist = "" #buffer to allow for blacklist optionality - File blacklist_file = select_first([blacklist, string_blacklist]) - call bedtools.intersect as merge_rmblklist { - input : - fileA=mergebam.mergebam, - fileB=blacklist_file, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files', - nooverlap=true - } - call samtools.indexstats as merge_bklist { - input : - bamfile=merge_rmblklist.intersect_out, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' - } - } # end if blacklist provided - - File mergebam_afterbklist = select_first([merge_rmblklist.intersect_out, mergebam.mergebam]) - - call samtools.markdup as merge_markdup { - input : - bamfile=mergebam_afterbklist, - default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' - } - - call samtools.indexstats as merge_mkdup { - input : - bamfile=merge_markdup.mkdupbam, - default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' - } - } # end if length(fastqfiles) > 1: multi_fastq - -### ---------------------------------------- ### -### ------------ S E C T I O N 2 ----------- ### -### -- B: analysis if one FASTQ provided --- ### -### ---------------------------------------- ### - - # if only one fastqfile is provided - if ( one_fastq ) { - # Execute analysis on each fastq file provided - # Analysis executed: - # FastQC - # FASTQ read length distribution - # Reference Alignment using Bowtie (-k2 -m2) - # Convert SAM to BAM - # FastQC on BAM files - # Remove Blacklists (if provided) - # Remove read duplicates - # Summary statistics on FASTQs - # Combine html files into one for easy viewing - - call fastqc.fastqc as uno_fastqc { - input : - inputfile=fastqfiles[0], - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call util.basicfastqstats as uno_bfs { - input : - fastqfile=fastqfiles[0], - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' - } - - call mapping.mapping { - input : - fastqfile=fastqfiles[0], - index_files=actual_bowtie_index, - metricsfile=uno_bfs.metrics_out, - blacklist=blacklist, - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/BAM_files' - } - - call fastqc.fastqc as uno_bamfqc { - input : - inputfile=mapping.sorted_bam, - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call runspp.runspp as uno_runspp { - input: - bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) - } - - call bedtools.bamtobed as uno_bamtobed { - input: - bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) - } - } # end if length(fastqfiles) == 1: one_fastq - -### ---------------------------------------- ### -### ------------ S E C T I O N 3 ----------- ### -### ----------- ChIP-seq analysis ---------- ### -### ---------------------------------------- ### - - # ChIP-seq and downstream analysis - # Execute analysis on merge bam file - # Analysis executed: - # FIRST: Check if reads are mapped - # Peaks identification (SICER, MACS, ROSE) - # Motif analysis - # Complete Summary statistics - - #collate correct files for downstream analysis - File sample_bam = select_first([mergebam_afterbklist, mapping.bklist_bam, mapping.sorted_bam]) - - call macs.macs { - input : - bamfile=sample_bam, - pvalue="1e-9", - keep_dup="auto", - egs=egs.genomesize, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-auto', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-auto' - } - - call util.addreadme { - input : - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS' - } - - call macs.macs as all { - input : - bamfile=sample_bam, - pvalue="1e-9", - keep_dup="all", - egs=egs.genomesize, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-all', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-all' - } - - call macs.macs as nomodel { - input : - bamfile=sample_bam, - nomodel=true, - egs=egs.genomesize, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-nm', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_nm' - } - - call bamtogff.bamtogff { - input : - gtffile=gtf, - chromsizes=samtools_faidx.chromsizes, - bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), - bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/BAM_Density' - } - - call bedtools.bamtobed as forsicerbed { - input : - bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]) - } - - call sicer.sicer { - input : - bedfile=forsicerbed.bedfile, - chromsizes=samtools_faidx.chromsizes, - genome_fraction=egs.genomefraction, - fragmentlength=select_first([uno_bfs.readlength, mergebam.avg_readlength]), - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/BROAD_peaks', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' - } - - call rose.rose { - input : - gtffile=gtf, - bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), - bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), - bedfile_auto=macs.peakbedfile, - bedfile_all=all.peakbedfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/STITCHED_peaks' - } - - call runspp.runspp { - input: - bamfile=sample_bam - } - - call util.peaksanno { - input : - gtffile=gtf, - bedfile=macs.peakbedfile, - chromsizes=samtools_faidx.chromsizes, - summitfile=macs.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') - } - - call util.peaksanno as all_peaksanno { - input : - gtffile=gtf, - bedfile=all.peakbedfile, - chromsizes=samtools_faidx.chromsizes, - summitfile=all.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') - } - - call util.peaksanno as nomodel_peaksanno { - input : - gtffile=gtf, - bedfile=nomodel.peakbedfile, - chromsizes=samtools_faidx.chromsizes, - summitfile=nomodel.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') - } - - call util.peaksanno as sicer_peaksanno { - input : - gtffile=gtf, - bedfile=sicer.scoreisland, - chromsizes=samtools_faidx.chromsizes, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/BROAD_peaks' - } - - # Motif Analysis - if (run_motifs) { - call motifs.motifs { - input: - reference=reference, - reference_index=samtools_faidx.faidx_file, - bedfile=macs.peakbedfile, - motif_databases=motif_databases, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' - } - - call util.flankbed { - input : - bedfile=macs.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' - } - - call motifs.motifs as flank { - input: - reference=reference, - reference_index=samtools_faidx.faidx_file, - bedfile=flankbed.flankbedfile, - motif_databases=motif_databases, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' - } - } - - call viz.visualization { - input: - wigfile=macs.wigfile, - chromsizes=samtools_faidx.chromsizes, - xlsfile=macs.peakxlsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') - } - - call viz.visualization as vizall { - input: - wigfile=all.wigfile, - chromsizes=samtools_faidx.chromsizes, - xlsfile=all.peakxlsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') - } - - call viz.visualization as viznomodel { - input: - wigfile=nomodel.wigfile, - chromsizes=samtools_faidx.chromsizes, - xlsfile=nomodel.peakxlsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') - } - - call viz.visualization as vizsicer { - input: - wigfile=sicer.wigfile, - chromsizes=samtools_faidx.chromsizes, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' - } - - call bedtools.bamtobed as finalbed { - input: - bamfile=sample_bam - } - - call sortbed.sortbed { - input: - bedfile=finalbed.bedfile - } - - call bedtools.intersect { - input: - fileA=macs.peakbedfile, - fileB=sortbed.sortbed_out, - countoverlap=true, - sorted=true - } - -### ---------------------------------------- ### -### ------------ S E C T I O N 4 ----------- ### -### ---------- Summary Statistics ---------- ### -### ---------------------------------------- ### - - String string_qual = "" #buffer to allow for optionality in if statement - - #SUMMARY STATISTICS - if ( one_fastq ) { - call util.evalstats as uno_summarystats { - # SUMMARY STATISTICS of sample file (only 1 sample file provided) - input: - fastq_type="SEAseq Sample FASTQ", - bambed=finalbed.bedfile, - sppfile=runspp.spp_out, - fastqczip=select_first([uno_bamfqc.zipfile, string_qual]), - bamflag=mapping.bam_stats, - rmdupflag=mapping.mkdup_stats, - bkflag=mapping.bklist_stats, - fastqmetrics=uno_bfs.metrics_out, - countsfile=intersect.intersect_out, - peaksxls=macs.peakxlsfile, - enhancers=rose.enhancers, - superenhancers=rose.super_enhancers, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' - } - - call util.summaryreport as uno_overallsummary { - # Presenting all quality stats for the analysis - input: - overallqc_html=uno_summarystats.xhtml, - overallqc_txt=uno_summarystats.textfile - } - } # end if one_fastq - - if ( multi_fastq ) { - call util.evalstats as merge_summarystats { - # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) - input: - fastq_type="SEAseq Comprehensive", - bambed=finalbed.bedfile, - sppfile=runspp.spp_out, - fastqczip=select_first([mergebamfqc.zipfile, string_qual]), - bamflag=mergeindexstats.flagstats, - rmdupflag=merge_mkdup.flagstats, - bkflag=merge_bklist.flagstats, - countsfile=intersect.intersect_out, - peaksxls=macs.peakxlsfile, - enhancers=rose.enhancers, - superenhancers=rose.super_enhancers, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' - } - - call util.summaryreport as merge_overallsummary { - # Presenting all quality stats for the analysis - input: - sampleqc_html=mergehtml.xhtml, - overallqc_html=merge_summarystats.xhtml, - sampleqc_txt=mergehtml.mergetxt, - overallqc_txt=merge_summarystats.textfile - } - } # end if multi_fastq - - output { - #SPIKE-IN - Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile - Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile - Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output - - #FASTQC - Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile - Array[File?]? indv_s_zipfile = indv_fastqc.zipfile - Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile - Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile - - File? s_mergebam_htmlfile = mergebamfqc.htmlfile - File? s_mergebam_zipfile = mergebamfqc.zipfile - - File? uno_s_htmlfile = uno_fastqc.htmlfile - File? uno_s_zipfile = uno_fastqc.zipfile - File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile - File? uno_s_bam_zipfile = uno_bamfqc.zipfile - - #BASICMETRICS - Array[File?]? s_metrics_out = indv_bfs.metrics_out - File? uno_s_metrics_out = uno_bfs.metrics_out - - #BAMFILES - Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam - Array[File?]? indv_s_indexbam = indv_mapping.bam_index - Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam - Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index - Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam - Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index - - File? uno_s_sortedbam = mapping.sorted_bam - File? uno_s_indexstatsbam = mapping.bam_index - File? uno_s_bkbam = mapping.bklist_bam - File? uno_s_bkindexbam = mapping.bklist_index - File? uno_s_rmbam = mapping.mkdup_bam - File? uno_s_rmindexbam = mapping.mkdup_index - - File? s_mergebamfile = mergebam.mergebam - File? s_mergebamindex = mergeindexstats.indexbam - File? s_bkbam = merge_rmblklist.intersect_out - File? s_bkindexbam = merge_bklist.indexbam - File? s_rmbam = merge_markdup.mkdupbam - File? s_rmindexbam = merge_mkdup.indexbam - - #MACS - File? peakbedfile = macs.peakbedfile - File? peakxlsfile = macs.peakxlsfile - File? summitsfile = macs.summitsfile - File? negativexlsfile = macs.negativepeaks - File? wigfile = macs.wigfile - File? all_peakbedfile = all.peakbedfile - File? all_peakxlsfile = all.peakxlsfile - File? all_summitsfile = all.summitsfile - File? all_wigfile = all.wigfile - File? all_negativexlsfile = all.negativepeaks - File? nm_peakbedfile = nomodel.peakbedfile - File? nm_peakxlsfile = nomodel.peakxlsfile - File? nm_summitsfile = nomodel.summitsfile - File? nm_wigfile = nomodel.wigfile - File? nm_negativexlsfile = nomodel.negativepeaks - File? readme_peaks = addreadme.readme_peaks - - #SICER - File? scoreisland = sicer.scoreisland - File? sicer_wigfile = sicer.wigfile - - #ROSE - File? pngfile = rose.pngfile - File? mapped_union = rose.mapped_union - File? mapped_stitch = rose.mapped_stitch - File? enhancers = rose.enhancers - File? super_enhancers = rose.super_enhancers - File? gff_file = rose.gff_file - File? gff_union = rose.gff_union - File? union_enhancers = rose.union_enhancers - File? stitch_enhancers = rose.stitch_enhancers - File? e_to_g_enhancers = rose.e_to_g_enhancers - File? g_to_e_enhancers = rose.g_to_e_enhancers - File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers - File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers - File? supergenes = rose.super_genes - File? allgenes = rose.all_genes - - #MOTIFS - File? flankbedfile = flankbed.flankbedfile - - File? ame_tsv = motifs.ame_tsv - File? ame_html = motifs.ame_html - File? ame_seq = motifs.ame_seq - File? meme = motifs.meme_out - File? meme_summary = motifs.meme_summary - - File? summit_ame_tsv = flank.ame_tsv - File? summit_ame_html = flank.ame_html - File? summit_ame_seq = flank.ame_seq - File? summit_meme = flank.meme_out - File? summit_meme_summary = flank.meme_summary - - #BAM2GFF - File? s_matrices = bamtogff.s_matrices - File? densityplot = bamtogff.densityplot - File? pdf_gene = bamtogff.pdf_gene - File? pdf_h_gene = bamtogff.pdf_h_gene - File? png_h_gene = bamtogff.png_h_gene - File? jpg_h_gene = bamtogff.jpg_h_gene - File? pdf_promoters = bamtogff.pdf_promoters - File? pdf_h_promoters = bamtogff.pdf_h_promoters - File? png_h_promoters = bamtogff.png_h_promoters - File? jpg_h_promoters = bamtogff.jpg_h_promoters - - #PEAKS-ANNOTATION - File? peak_promoters = peaksanno.peak_promoters - File? peak_genebody = peaksanno.peak_genebody - File? peak_window = peaksanno.peak_window - File? peak_closest = peaksanno.peak_closest - File? peak_comparison = peaksanno.peak_comparison - File? gene_comparison = peaksanno.gene_comparison - File? pdf_comparison = peaksanno.pdf_comparison - - File? all_peak_promoters = all_peaksanno.peak_promoters - File? all_peak_genebody = all_peaksanno.peak_genebody - File? all_peak_window = all_peaksanno.peak_window - File? all_peak_closest = all_peaksanno.peak_closest - File? all_peak_comparison = all_peaksanno.peak_comparison - File? all_gene_comparison = all_peaksanno.gene_comparison - File? all_pdf_comparison = all_peaksanno.pdf_comparison - - File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters - File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody - File? nomodel_peak_window = nomodel_peaksanno.peak_window - File? nomodel_peak_closest = nomodel_peaksanno.peak_closest - File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison - File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison - File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison - - File? sicer_peak_promoters = sicer_peaksanno.peak_promoters - File? sicer_peak_genebody = sicer_peaksanno.peak_genebody - File? sicer_peak_window = sicer_peaksanno.peak_window - File? sicer_peak_closest = sicer_peaksanno.peak_closest - File? sicer_peak_comparison = sicer_peaksanno.peak_comparison - File? sicer_gene_comparison = sicer_peaksanno.gene_comparison - File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison - - #VISUALIZATION - File? bigwig = visualization.bigwig - File? norm_wig = visualization.norm_wig - File? tdffile = visualization.tdffile - File? n_bigwig = viznomodel.bigwig - File? n_norm_wig = viznomodel.norm_wig - File? n_tdffile = viznomodel.tdffile - File? a_bigwig = vizall.bigwig - File? a_norm_wig = vizall.norm_wig - File? a_tdffile = vizall.tdffile - - File? s_bigwig = vizsicer.bigwig - File? s_norm_wig = vizsicer.norm_wig - File? s_tdffile = vizsicer.tdffile - - #QC-STATS - Array[File?]? s_qc_statsfile = indv_summarystats.statsfile - Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile - Array[File?]? s_qc_textfile = indv_summarystats.textfile - File? s_qc_mergehtml = mergehtml.mergefile - - File? s_uno_statsfile = uno_summarystats.statsfile - File? s_uno_htmlfile = uno_summarystats.htmlfile - File? s_uno_textfile = uno_summarystats.textfile - - File? statsfile = merge_summarystats.statsfile - File? htmlfile = merge_summarystats.htmlfile - File? textfile = merge_summarystats.textfile - - File? summaryhtml = select_first([uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml]) - File? summarytxt = select_first([uno_overallsummary.summarytxt,merge_overallsummary.summarytxt]) - } -} \ No newline at end of file diff --git a/backup/wdl-format-old/CHANGELOG.md b/backup/wdl-format-old/CHANGELOG.md deleted file mode 100644 index 3eeeadd81..000000000 --- a/backup/wdl-format-old/CHANGELOG.md +++ /dev/null @@ -1,12 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## Unreleased - -### Added - -* Added the `wdl-format` crate for formatting WDL documents ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)). diff --git a/backup/wdl-format-old/Cargo.toml b/backup/wdl-format-old/Cargo.toml deleted file mode 100644 index eec06496c..000000000 --- a/backup/wdl-format-old/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "wdl-format" -version = "0.1.0" -license.workspace = true -edition.workspace = true -authors.workspace = true -homepage.workspace = true -repository.workspace = true - -[dependencies] -anyhow.workspace = true -wdl-ast = { path = "../wdl-ast", version = "0.5.0" } -wdl-grammar = { version = "0.6.0", path = "../wdl-grammar" } - -[dev-dependencies] -pretty_assertions = { workspace = true } -approx = { workspace = true } -rayon = { workspace = true } -colored = { workspace = true } -codespan-reporting = { workspace = true } - -[features] -codespan = ["wdl-ast/codespan"] - -[[test]] -name = "format" -required-features = ["codespan"] -harness = false diff --git a/backup/wdl-format-old/src/formatter.rs b/backup/wdl-format-old/src/formatter.rs deleted file mode 100644 index 6ef467b44..000000000 --- a/backup/wdl-format-old/src/formatter.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! Contains the `Formatter` struct, which is used to keep track of the -//! current formatting state. This includes the current indentation level and -//! whether the current line has been interrupted by comments. -//! The state becomes "interrupted" by comments when a comment forces a newline -//! where it would otherwise not be expected. In this case, the next line(s) -//! will be indented by one level. - -use crate::Formattable; -use crate::NEWLINE; - -/// Space constant used for formatting. -pub const SPACE: &str = " "; -/// Indentation constant used for formatting. Indentation is four spaces -/// per-level. -pub const INDENT: &str = " "; -/// Inline comment space constant used for formatting. -/// -/// Inline comments should start two spaces after the end of the element they -/// are commenting on. -pub const INLINE_COMMENT_SPACE: &str = " "; - -/// The `Formatter` struct is used to keep track of the current formatting -/// state. This includes the current indentation level and whether the current -/// line has been interrupted by comments. -#[derive(Debug, Clone, Copy, Default)] -pub struct Formatter { - /// The current indentation level. - indent_level: usize, - /// Whether the current line has been interrupted by comments. - interrupted_by_comments: bool, -} - -impl Formatter { - /// Format an element. - pub fn format( - mut self, - element: &T, - writer: &mut F, - ) -> std::fmt::Result { - element.format(writer, &mut self) - } - - /// Add the current indentation to the writer. - /// The indentation level will be temporarily increased by one if the - /// current line has been interrupted by comments. - pub fn indent(&self, writer: &mut T) -> std::fmt::Result { - write!( - writer, - "{}", - INDENT.repeat(self.indent_level + (if self.interrupted_by_comments { 1 } else { 0 })) - ) - } - - /// Add a space or an indentation to the writer. If the current line has - /// been interrupted by comments, an indentation is added. Otherwise, a - /// space is added. - pub fn space_or_indent(&mut self, writer: &mut T) -> std::fmt::Result { - if !self.interrupted_by_comments { - write!(writer, "{}", SPACE)?; - } else { - self.indent(writer)?; - } - self.reset_interrupted(); - Ok(()) - } - - /// Add a level of indentation. - pub fn increment_indent(&mut self) { - self.indent_level += 1; - self.reset_interrupted(); - } - - /// Remove a level of indentation. - pub fn decrement_indent(&mut self) { - self.indent_level = self.indent_level.saturating_sub(1); - self.reset_interrupted(); - } - - /// Check if the current line has been interrupted by comments. - pub fn interrupted(&self) -> bool { - self.interrupted_by_comments - } - - /// Interrupt the current line with comments. - pub fn interrupt(&mut self) { - self.interrupted_by_comments = true; - } - - /// Reset the interrupted state. - pub fn reset_interrupted(&mut self) { - self.interrupted_by_comments = false; - } - - pub fn format_preceding_trivia( - &mut self, - writer: &mut F, - comments: Box<[String]>, - would_be_interrupting: bool, - respect_blank_lines: bool, - ) -> std::fmt::Result { - if would_be_interrupting && !comments.is_empty() && !self.interrupted_by_comments { - write!(writer, "{}", NEWLINE)?; - self.interrupt(); - } - for comment in comments { - if !respect_blank_lines && !comment.starts_with('#') { - continue; - } - self.indent(writer)?; - write!(writer, "{}{}", comment, NEWLINE)?; - } - Ok(()) - } - - pub fn format_inline_comment( - &mut self, - writer: &mut F, - comment: Option, - would_be_interrupting: bool, - ) -> std::fmt::Result { - if let Some(ref comment) = comment { - write!(writer, "{}{}{}", INLINE_COMMENT_SPACE, comment, NEWLINE)?; - } - if would_be_interrupting && comment.is_some() { - self.interrupt(); - } else if !would_be_interrupting && comment.is_none() { - write!(writer, "{}", NEWLINE)?; - } - Ok(()) - } -} diff --git a/backup/wdl-format-old/src/import.rs b/backup/wdl-format-old/src/import.rs deleted file mode 100644 index 5cc1cb96b..000000000 --- a/backup/wdl-format-old/src/import.rs +++ /dev/null @@ -1,174 +0,0 @@ -//! Format import statements. - -use wdl_ast::token_child; -use wdl_ast::v1::AliasKeyword; -use wdl_ast::v1::AsKeyword; -use wdl_ast::v1::ImportAlias; -use wdl_ast::v1::ImportKeyword; -use wdl_ast::v1::ImportStatement; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::Ident; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; -use wdl_grammar::SyntaxExt; - -use crate::Formattable; -use crate::Formatter; - -impl Formattable for ImportKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for AsKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for AliasKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for ImportAlias { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; - - let alias_keyword = self.alias_keyword(); - formatter.space_or_indent(writer)?; - alias_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, alias_keyword.syntax().inline_comment(), true)?; - - let (source, target) = self.names(); - - formatter.format_preceding_trivia( - writer, - source.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - source.format(writer, formatter)?; - formatter.format_inline_comment(writer, source.syntax().inline_comment(), true)?; - - let as_keyword = self.as_keyword(); - formatter.format_preceding_trivia( - writer, - as_keyword.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - as_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; - - formatter.format_preceding_trivia( - writer, - target.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - target.format(writer, formatter)?; - - formatter.format_inline_comment(writer, self.syntax().inline_comment(), true) - } -} - -impl Formattable for ImportStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - formatter.format_preceding_trivia( - writer, - self.syntax().preceding_trivia(), - false, - false, - )?; - - let import_keyword = self.keyword(); - import_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, import_keyword.syntax().inline_comment(), true)?; - - let uri = self.uri(); - formatter.format_preceding_trivia(writer, uri.syntax().preceding_trivia(), true, false)?; - formatter.space_or_indent(writer)?; - uri.format(writer, formatter)?; - formatter.format_inline_comment(writer, uri.syntax().inline_comment(), true)?; - - let as_keyword = token_child::(self.syntax()); - if let Some(as_keyword) = as_keyword { - formatter.format_preceding_trivia( - writer, - as_keyword.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - as_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, as_keyword.syntax().inline_comment(), true)?; - - let ident = self - .explicit_namespace() - .expect("import with as clause should have an explicit namespace"); - formatter.format_preceding_trivia( - writer, - ident.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - ident.format(writer, formatter)?; - formatter.format_inline_comment(writer, ident.syntax().inline_comment(), true)?; - } - - for alias in self.aliases() { - alias.format(writer, formatter)?; - } - - formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) - } -} - -/// Sorts import statements by their core components. -/// -/// The core components of an import statement are the URI and the namespace. -/// These two elements guarantee a unique import statement. -pub fn sort_imports(a: &ImportStatement, b: &ImportStatement) -> std::cmp::Ordering { - ( - a.uri() - .text() - .expect("import URI cannot have placeholders") - .as_str(), - &a.namespace().expect("import namespace should exist").0, - ) - .cmp(&( - b.uri() - .text() - .expect("import URI cannot have placeholders") - .as_str(), - &b.namespace().expect("import namespace should exist").0, - )) -} diff --git a/backup/wdl-format-old/src/lib.rs b/backup/wdl-format-old/src/lib.rs deleted file mode 100644 index 283a2c77f..000000000 --- a/backup/wdl-format-old/src/lib.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! A library for auto-formatting WDL code. - -#![warn(missing_docs)] -#![warn(rust_2018_idioms)] -#![warn(rust_2021_compatibility)] -#![warn(missing_debug_implementations)] -#![warn(clippy::missing_docs_in_private_items)] -#![warn(rustdoc::broken_intra_doc_links)] - -use anyhow::Result; -use wdl_ast::token_child; -use wdl_ast::v1::VersionKeyword; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::Diagnostic; -use wdl_ast::Document; -use wdl_ast::Ident; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; -use wdl_ast::SyntaxNode; -use wdl_ast::Validator; -use wdl_ast::Version; -use wdl_ast::VersionStatement; -use wdl_grammar::SyntaxExt; - -mod formatter; -mod import; -// mod metadata; -mod task; -mod v1; -mod workflow; - -use formatter::Formatter; - -/// Newline constant used for formatting on windows platforms. -#[cfg(windows)] -pub const NEWLINE: &str = "\r\n"; -/// Newline constant used for formatting on non-windows platforms. -#[cfg(not(windows))] -pub const NEWLINE: &str = "\n"; -/// String terminator constant used for formatting. -const STRING_TERMINATOR: char = '"'; -/// Lint directive prefix constant used for formatting. -const LINT_DIRECTIVE_PREFIX: &str = "#@"; - -/// A trait for elements that can be formatted. -pub trait Formattable { - /// Format the element and write it to the writer. - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result; -} - -impl Formattable for VersionKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for Version { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for VersionStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - let mut preamble_comments = Vec::new(); - let mut lint_directives = Vec::new(); - - for comment in self.syntax().preceding_trivia() { - if comment.starts_with(LINT_DIRECTIVE_PREFIX) { - lint_directives.push(comment); - } else if comment.starts_with('#') { - preamble_comments.push(comment); - } // else is just a newline - } - - for comment in preamble_comments.iter() { - write!(writer, "{}{}", comment, NEWLINE)?; - } - - // If there are preamble comments, ensure a blank line is inserted - if !preamble_comments.is_empty() { - write!(writer, "{}", NEWLINE)?; - } - - for comment in lint_directives.iter() { - write!(writer, "{}{}", comment, NEWLINE)?; - } - - let version_keyword = self.keyword(); - version_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, version_keyword.syntax().inline_comment(), true)?; - - let version = self.version(); - formatter.format_preceding_trivia( - writer, - version.syntax().preceding_trivia(), - true, - false, - )?; - formatter.space_or_indent(writer)?; - version.format(writer, formatter)?; - formatter.format_inline_comment(writer, self.syntax().inline_comment(), false) - } -} - -impl Formattable for Ident { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for Document { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - let ast = self.ast(); - let ast = ast.as_v1().expect("document should be a v1 document"); - let version_statement = self - .version_statement() - .expect("document should have a version statement"); - version_statement.format(writer, formatter)?; - let mut imports = ast.imports().collect::>(); - if !imports.is_empty() { - write!(writer, "{}", NEWLINE)?; - } - imports.sort_by(import::sort_imports); - for import in imports { - import.format(writer, formatter)?; - } - for item in ast.items() { - if item.syntax().kind() == SyntaxKind::ImportStatementNode { - continue; - } - // write!(writer, "{}", NEWLINE)?; - // item.format(writer, formatter)?; - } - Ok(()) - } -} - -/// Format a WDL document. -pub fn format_document(code: &str) -> Result> { - let (document, diagnostics) = Document::parse(code); - if !diagnostics.is_empty() { - return Err(diagnostics); - } - let mut validator = Validator::default(); - match validator.validate(&document) { - std::result::Result::Ok(_) => { - // The document is valid, so we can format it. - } - Err(diagnostics) => return Err(diagnostics), - } - - let mut result = String::new(); - let formatter = &mut Formatter::default(); - - match formatter.format(&document, &mut result) { - Ok(_) => {} - Err(error) => { - let msg = format!("Failed to format document: {}", error); - return Err(vec![Diagnostic::error(msg)]); - } - } - - Ok(result) -} diff --git a/backup/wdl-format-old/src/metadata.rs b/backup/wdl-format-old/src/metadata.rs deleted file mode 100644 index 9bb557f29..000000000 --- a/backup/wdl-format-old/src/metadata.rs +++ /dev/null @@ -1,365 +0,0 @@ -//! A module for formatting metadata sections (meta and parameter_meta). - -use wdl_ast::v1::LiteralNull; -use wdl_ast::v1::MetadataArray; -use wdl_ast::v1::MetadataObject; -use wdl_ast::v1::MetadataObjectItem; -use wdl_ast::v1::MetadataSection; -use wdl_ast::v1::MetadataValue; -use wdl_ast::v1::ParameterMetadataSection; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; - -use super::comments::format_inline_comment; -use super::comments::format_preceding_comments; -use super::first_child_of_kind; -use super::format_element_with_comments; -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::LinePosition; -use super::NEWLINE; - -impl Formattable for LiteralNull { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) - } -} - -impl Formattable for MetadataObject { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - format_element_with_comments( - &open_brace, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - let mut commas = self - .syntax() - .children_with_tokens() - .filter(|c| c.kind() == SyntaxKind::Comma); - - for item in self.items() { - item.format(writer, formatter)?; - if let Some(cur_comma) = commas.next() { - format_element_with_comments( - &cur_comma, - writer, - formatter, - LinePosition::End, - |_, _| Ok(()), - )?; - } else { - // No trailing comma was in the input - write!(writer, ",")?; - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - format_preceding_comments(&close_brace, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_brace)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - true, - ) - } -} - -impl Formattable for MetadataArray { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let open_bracket = first_child_of_kind(self.syntax(), SyntaxKind::OpenBracket); - format_element_with_comments( - &open_bracket, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - let mut commas = self - .syntax() - .children_with_tokens() - .filter(|c| c.kind() == SyntaxKind::Comma); - - for item in self.elements() { - formatter.indent(writer)?; - item.format(writer, formatter)?; - if let Some(cur_comma) = commas.next() { - format_element_with_comments( - &cur_comma, - writer, - formatter, - LinePosition::End, - |_, _| Ok(()), - )?; - } else { - // No trailing comma was in the input - write!(writer, ",")?; - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_bracket = first_child_of_kind(self.syntax(), SyntaxKind::CloseBracket); - format_preceding_comments(&close_bracket, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_bracket)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - true, - ) - } -} - -impl Formattable for MetadataValue { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - MetadataValue::String(s) => s.format(writer, formatter), - MetadataValue::Boolean(b) => b.format(writer, formatter), - MetadataValue::Float(f) => f.format(writer, formatter), - MetadataValue::Integer(i) => i.format(writer, formatter), - MetadataValue::Null(n) => n.format(writer, formatter), - MetadataValue::Object(o) => o.format(writer, formatter), - MetadataValue::Array(a) => a.format(writer, formatter), - } - } -} - -impl Formattable for MetadataObjectItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let name = self.name(); - formatter.indent(writer)?; - name.format(writer, formatter)?; - format_inline_comment( - &SyntaxElement::from(name.syntax().clone()), - writer, - formatter, - true, - )?; - - let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - format_element_with_comments( - &colon, - writer, - formatter, - LinePosition::Middle, - |writer, formatter| { - if formatter.interrupted() { - formatter.indent(writer)?; - formatter.reset_interrupted(); - } - Ok(()) - }, - )?; - - let value = self.value(); - format_preceding_comments( - &SyntaxElement::from(value.syntax().clone()), - writer, - formatter, - true, - )?; - formatter.space_or_indent(writer)?; - value.format(writer, formatter)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - true, - ) - } -} - -impl Formattable for MetadataSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let meta_keyword = first_child_of_kind(self.syntax(), SyntaxKind::MetaKeyword); - formatter.indent(writer)?; - write!(writer, "{}", meta_keyword)?; - format_inline_comment(&meta_keyword, writer, formatter, true)?; - - let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - format_element_with_comments( - &open_brace, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } else { - write!(writer, "{}", SPACE)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - for item in self.items() { - item.format(writer, formatter)?; - if formatter.interrupted() { - formatter.reset_interrupted(); - } else { - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - format_preceding_comments(&close_brace, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_brace)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - ) - } -} - -impl Formattable for ParameterMetadataSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - format_preceding_comments( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - )?; - - let parameter_meta_keyword = - first_child_of_kind(self.syntax(), SyntaxKind::ParameterMetaKeyword); - formatter.indent(writer)?; - write!(writer, "{}", parameter_meta_keyword)?; - format_inline_comment(¶meter_meta_keyword, writer, formatter, true)?; - - let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - format_element_with_comments( - &open_brace, - writer, - formatter, - LinePosition::End, - |writer, formatter| { - if formatter.interrupted() { - formatter.reset_interrupted(); - formatter.indent(writer)?; - } else { - write!(writer, "{}", SPACE)?; - } - Ok(()) - }, - )?; - - formatter.increment_indent(); - - for item in self.items() { - item.format(writer, formatter)?; - if formatter.interrupted() { - formatter.reset_interrupted(); - } else { - write!(writer, "{}", NEWLINE)?; - } - } - - formatter.decrement_indent(); - - let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - format_preceding_comments(&close_brace, writer, formatter, false)?; - formatter.indent(writer)?; - write!(writer, "{}", close_brace)?; - format_inline_comment( - &SyntaxElement::from(self.syntax().clone()), - writer, - formatter, - false, - ) - } -} diff --git a/backup/wdl-format-old/src/task.rs b/backup/wdl-format-old/src/task.rs deleted file mode 100644 index 6e01b8843..000000000 --- a/backup/wdl-format-old/src/task.rs +++ /dev/null @@ -1,455 +0,0 @@ -//! A module for formatting elements in tasks. - -use wdl_ast::v1::CommandPart; -use wdl_ast::v1::CommandSection; -use wdl_ast::v1::CommandText; -use wdl_ast::v1::Decl; -use wdl_ast::v1::RequirementsItem; -use wdl_ast::v1::RequirementsSection; -use wdl_ast::v1::RuntimeItem; -use wdl_ast::v1::RuntimeSection; -use wdl_ast::v1::TaskDefinition; -use wdl_ast::v1::TaskItem; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; - -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::NEWLINE; - -impl Formattable for CommandText { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for CommandSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let command_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::CommandKeyword); formatter.indent(writer)?; - // write!(writer, "{}", command_keyword)?; - // format_inline_comment(&command_keyword, writer, formatter, true)?; - - // // coerce all command sections to use heredoc ('<<<>>>>') syntax - // // (as opposed to bracket ('{}') syntax) - // let open_section = if self.is_heredoc() { - // first_child_of_kind(self.syntax(), SyntaxKind::OpenHeredoc) - // } else { - // first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace) - // }; - // format_preceding_comments(&open_section, writer, formatter, true)?; - - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "<<<")?; - - // for part in self.parts() { - // match part { - // CommandPart::Text(t) => { - // t.format(writer, formatter)?; - // } - // CommandPart::Placeholder(p) => { - // p.format(writer, formatter)?; - // } - // } - // } - - // write!(writer, ">>>")?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RuntimeItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let name = self.name(); - // formatter.indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RuntimeSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let runtime_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::RuntimeKeyword); formatter.indent(writer)?; - // write!(writer, "{}", runtime_keyword)?; - // format_inline_comment(&runtime_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for item in self.items() { - // item.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, true)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RequirementsItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let name = self.name(); - // formatter.indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for RequirementsSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let requirements_keyword = - // first_child_of_kind(self.syntax(), SyntaxKind::RequirementsKeyword); - // formatter.indent(writer)?; - // write!(writer, "{}", requirements_keyword)?; - // format_inline_comment(&requirements_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for item in self.items() { - // item.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, true)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for TaskDefinition { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let task_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::TaskKeyword); formatter.indent(writer)?; - // write!(writer, "{}", task_keyword)?; - // format_inline_comment(&task_keyword, writer, formatter, true)?; - - // let name = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // let mut meta_section_str = String::new(); - // let mut parameter_meta_section_str = String::new(); - // let mut input_section_str = String::new(); - // let mut declaration_section_str = String::new(); - // let mut command_section_str = String::new(); - // let mut output_section_str = String::new(); - // let mut runtime_section_str = String::new(); - // let mut hints_section_str = String::new(); - // let mut requirements_section_str = String::new(); - - // for item in self.items() { - // match item { - // TaskItem::Metadata(m) => { - // m.format(&mut meta_section_str, formatter)?; - // } - // TaskItem::ParameterMetadata(pm) => { - // pm.format(&mut parameter_meta_section_str, formatter)?; - // } - // TaskItem::Input(i) => { - // i.format(&mut input_section_str, formatter)?; - // } - // TaskItem::Declaration(d) => { - // Decl::Bound(d).format(&mut declaration_section_str, formatter)?; - // } - // TaskItem::Command(c) => { - // c.format(&mut command_section_str, formatter)?; - // } - // TaskItem::Output(o) => { - // o.format(&mut output_section_str, formatter)?; - // } - // TaskItem::Runtime(r) => { - // r.format(&mut runtime_section_str, formatter)?; - // } - // TaskItem::Hints(h) => { - // h.format(&mut hints_section_str, formatter)?; - // } - // TaskItem::Requirements(r) => { - // r.format(&mut requirements_section_str, formatter)?; - // } - // } - // } - - // let mut first_section = true; - - // if !meta_section_str.is_empty() { - // first_section = false; - // write!(writer, "{}", meta_section_str)?; - // } - // if !parameter_meta_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // first_section = false; - // write!(writer, "{}", parameter_meta_section_str)?; - // } - // if !input_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // first_section = false; - // write!(writer, "{}", input_section_str)?; - // } - // if !declaration_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // first_section = false; - // write!(writer, "{}", declaration_section_str)?; - // } - // // Command section is required - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", command_section_str)?; - // if !output_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", output_section_str)?; - // } - // if !runtime_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", runtime_section_str)?; - // } - // if !hints_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", hints_section_str)?; - // } - // if !requirements_section_str.is_empty() { - // write!(writer, "{}", NEWLINE)?; - // write!(writer, "{}", requirements_section_str)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, true)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} diff --git a/backup/wdl-format-old/src/v1.rs b/backup/wdl-format-old/src/v1.rs deleted file mode 100644 index 561a58ebc..000000000 --- a/backup/wdl-format-old/src/v1.rs +++ /dev/null @@ -1,711 +0,0 @@ -//! A module for formatting WDL v1 elements. - -use std::fmt::Write; - -use wdl_ast::v1::Decl; -use wdl_ast::v1::DefaultOption; -use wdl_ast::v1::DocumentItem; -use wdl_ast::v1::Expr; -use wdl_ast::v1::HintsItem; -use wdl_ast::v1::HintsSection; -use wdl_ast::v1::InputSection; -use wdl_ast::v1::LiteralBoolean; -use wdl_ast::v1::LiteralFloat; -use wdl_ast::v1::LiteralInteger; -use wdl_ast::v1::LiteralString; -use wdl_ast::v1::OutputSection; -use wdl_ast::v1::Placeholder; -use wdl_ast::v1::PlaceholderOption; -use wdl_ast::v1::SepOption; -use wdl_ast::v1::StringPart; -use wdl_ast::v1::StringText; -use wdl_ast::v1::StructDefinition; -use wdl_ast::v1::StructKeyword; -use wdl_ast::v1::TrueFalseOption; -use wdl_ast::v1::Type; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; -use wdl_grammar::SyntaxExt; - -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::NEWLINE; -use super::STRING_TERMINATOR; - -impl Formattable for DefaultOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let default_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); - // format_preceding_comments(&default_word, writer, formatter, true)?; - // write!(writer, "{}", default_word)?; - // format_inline_comment(&default_word, writer, formatter, true)?; - - // let assignment = first_child_of_kind(self.syntax(), SyntaxKind::Assignment); - // format_preceding_comments(&assignment, writer, formatter, true)?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // let value = self.value(); - // format_preceding_comments( - // &SyntaxElement::from(value.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // value.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(value.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for SepOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let sep_word = first_child_of_kind(self.syntax(), SyntaxKind::Ident); - // format_preceding_comments(&sep_word, writer, formatter, true)?; - // write!(writer, "{}", sep_word)?; - // format_inline_comment(&sep_word, writer, formatter, true)?; - - // let assignment = first_child_of_kind(self.syntax(), - // SyntaxKind::Assignment); format_preceding_comments(&assignment, - // writer, formatter, true)?; formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // let separator = self.separator(); - // format_preceding_comments( - // &SyntaxElement::from(separator.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // separator.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(separator.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for TrueFalseOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let mut true_clause = String::new(); - // let mut false_clause = String::new(); - // let mut which_clause = None; - // for child in self.syntax().children_with_tokens() { - // match child.kind() { - // SyntaxKind::TrueKeyword => { - // which_clause = Some(true); - - // format_preceding_comments(&child, &mut true_clause, formatter, - // true)?; write!(true_clause, "{}", child)?; - // format_inline_comment(&child, &mut true_clause, formatter, - // true)?; } - // SyntaxKind::FalseKeyword => { - // which_clause = Some(false); - - // format_preceding_comments(&child, &mut false_clause, formatter, - // true)?; write!(false_clause, "{}", child)?; - // format_inline_comment(&child, &mut false_clause, formatter, - // true)?; } - // SyntaxKind::Assignment => { - // let cur_clause = match which_clause { - // Some(true) => &mut true_clause, - // Some(false) => &mut false_clause, - // _ => unreachable!( - // "should have found a true or false keyword before an - // assignment" ), - // }; - - // format_preceding_comments(&child, cur_clause, formatter, true)?; - // formatter.space_or_indent(cur_clause)?; - // write!(cur_clause, "{}", child)?; - // format_inline_comment(&child, cur_clause, formatter, true)?; - // } - // SyntaxKind::LiteralStringNode => { - // let cur_clause = match which_clause { - // Some(true) => &mut true_clause, - // Some(false) => &mut false_clause, - // _ => unreachable!( - // "should have found a true or false keyword before a - // string" ), - // }; - - // format_preceding_comments(&child, cur_clause, formatter, true)?; - // formatter.space_or_indent(cur_clause)?; - // let literal_string = LiteralString::cast( - // child - // .as_node() - // .expect("LiteralStringNode should be a node") - // .clone(), - // ) - // .expect("LiteralStringNode should cast to a LiteralString"); - // literal_string.format(cur_clause, formatter)?; - // format_inline_comment(&child, writer, formatter, true)?; - // } - // SyntaxKind::Whitespace => { - // // Ignore - // } - // SyntaxKind::Comment => { - // // Handled by a call to `format_preceding_comments` - // // or `format_inline_comment` in another match arm. - // } - // _ => { - // unreachable!("Unexpected syntax kind: {:?}", child.kind()); - // } - // } - // } - // write!(writer, "{} {}", true_clause, false_clause)?; - - Ok(()) - } -} - -impl Formattable for PlaceholderOption { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - PlaceholderOption::Default(default) => default.format(writer, formatter), - PlaceholderOption::Sep(sep) => sep.format(writer, formatter), - PlaceholderOption::TrueFalse(true_false) => true_false.format(writer, formatter), - } - } -} - -impl Formattable for Placeholder { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // coerce all placeholders into '~{}' placeholders - // (as opposed to '${}' placeholders) - write!(writer, "~{{")?; - - let mut option_present = false; - if let Some(option) = self.options().next() { - option.format(writer, formatter)?; - option_present = true; - } - - let expr = self.expr(); - if option_present { - formatter.space_or_indent(writer)?; - } - expr.format(writer, formatter)?; - - write!(writer, "}}") - } -} - -impl Formattable for StringText { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - let mut iter = self.as_str().chars().peekable(); - let mut prev_c = None; - while let Some(c) = iter.next() { - match c { - '\\' => { - if let Some(next_c) = iter.peek() { - if *next_c == '\'' { - // Do not write this backslash - prev_c = Some(c); - continue; - } - } - writer.write_char(c)?; - } - '"' => { - if let Some(pc) = prev_c { - if pc != '\\' { - writer.write_char('\\')?; - } - } - writer.write_char(c)?; - } - _ => { - writer.write_char(c)?; - } - } - prev_c = Some(c); - } - - Ok(()) - } -} - -impl Formattable for LiteralString { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", STRING_TERMINATOR)?; - for part in self.parts() { - match part { - StringPart::Text(text) => { - text.format(writer, formatter)?; - } - StringPart::Placeholder(placeholder) => { - placeholder.format(writer, formatter)?; - } - } - } - write!(writer, "{}", STRING_TERMINATOR) - } -} - -impl Formattable for LiteralBoolean { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.value()) // TODO - } -} - -impl Formattable for LiteralFloat { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for LiteralInteger { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for Type { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for Expr { - fn format( - &self, - writer: &mut T, - _state: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.syntax()) // TODO - } -} - -impl Formattable for Decl { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let ty = self.ty(); - // formatter.indent(writer)?; - // ty.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(ty.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let name = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // if let Some(expr) = self.expr() { - // let assignment = first_child_of_kind(self.syntax(), - // SyntaxKind::Assignment); format_preceding_comments(&assignment, - // writer, formatter, true)?; formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // } - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for InputSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let input_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::InputKeyword); formatter.indent(writer)?; - // write!(writer, "{}", input_keyword)?; - // format_inline_comment(&input_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for decl in self.declarations() { - // decl.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for OutputSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let output_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::OutputKeyword); formatter.indent(writer)?; - // write!(writer, "{}", output_keyword)?; - // format_inline_comment(&output_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for decl in self.declarations() { - // Decl::Bound(decl).format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for HintsItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let name = self.name(); - // formatter.indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for HintsSection { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let hints_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::HintsKeyword); formatter.indent(writer)?; - // write!(writer, "{}", hints_keyword)?; - // format_inline_comment(&hints_keyword, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for item in self.items() { - // item.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for StructKeyword { - fn format( - &self, - writer: &mut T, - _formatter: &mut Formatter, - ) -> std::fmt::Result { - write!(writer, "{}", self.as_str()) - } -} - -impl Formattable for StructDefinition { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), false, true)?; - - let struct_keyword = self.keyword(); - struct_keyword.format(writer, formatter)?; - formatter.format_inline_comment(writer, struct_keyword.syntax().inline_comment(), true)?; - - let name = self.name(); - formatter.format_preceding_trivia(writer, self.syntax().preceding_trivia(), true, false)?; - formatter.space_or_indent(writer)?; - name.format(writer, formatter)?; - formatter.format_inline_comment(writer, name.syntax().inline_comment(), true)?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // if let Some(m) = self.metadata().next() { - // m.format(writer, formatter)?; - // write!(writer, "{}", NEWLINE)?; - // } - - // if let Some(pm) = self.parameter_metadata().next() { - // pm.format(writer, formatter)?; - // write!(writer, "{}", NEWLINE)?; - // } - - // for decl in self.members() { - // Decl::Unbound(decl).format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = self - // .syntax() - // .children_with_tokens() - // .find(|element| element.kind() == SyntaxKind::CloseBrace) - // .expect("StructDefinition should have a close brace"); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for DocumentItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - DocumentItem::Import(_) => { - unreachable!("Import statements should not be formatted as a DocumentItem") - } - DocumentItem::Workflow(workflow) => workflow.format(writer, formatter), - DocumentItem::Task(task) => task.format(writer, formatter), - DocumentItem::Struct(structure) => structure.format(writer, formatter), - } - } -} diff --git a/backup/wdl-format-old/src/workflow.rs b/backup/wdl-format-old/src/workflow.rs deleted file mode 100644 index f82e57003..000000000 --- a/backup/wdl-format-old/src/workflow.rs +++ /dev/null @@ -1,666 +0,0 @@ -//! A module for formatting elements in workflows. - -use wdl_ast::v1::CallAfter; -use wdl_ast::v1::CallAlias; -use wdl_ast::v1::CallInputItem; -use wdl_ast::v1::CallStatement; -use wdl_ast::v1::ConditionalStatement; -use wdl_ast::v1::Decl; -use wdl_ast::v1::ScatterStatement; -use wdl_ast::v1::WorkflowDefinition; -use wdl_ast::v1::WorkflowItem; -use wdl_ast::v1::WorkflowStatement; -use wdl_ast::AstNode; -use wdl_ast::AstToken; -use wdl_ast::SyntaxElement; -use wdl_ast::SyntaxKind; - -use super::formatter::SPACE; -use super::Formattable; -use super::Formatter; -use super::NEWLINE; - -impl Formattable for CallAlias { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let as_keyword = first_child_of_kind(self.syntax(), SyntaxKind::AsKeyword); - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", as_keyword)?; - // format_inline_comment(&as_keyword, writer, formatter, true)?; - - // let ident = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // ident.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for CallAfter { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let after_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::AfterKeyword); formatter.space_or_indent(writer)?; - // write!(writer, "{}", after_keyword)?; - // format_inline_comment(&after_keyword, writer, formatter, true)?; - - // let ident = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // ident.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for CallInputItem { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // let name = self.name(); - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // if let Some(expr) = self.expr() { - // let assignment = first_child_of_kind(self.syntax(), - // SyntaxKind::Assignment); format_preceding_comments(&assignment, - // writer, formatter, true)?; formatter.space_or_indent(writer)?; - // write!(writer, "{}", assignment)?; - // format_inline_comment(&assignment, writer, formatter, true)?; - - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // } - - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // true, - // ) - Ok(()) - } -} - -impl Formattable for CallStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let call_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::CallKeyword); formatter.indent(writer)?; - // write!(writer, "{}", call_keyword)?; - // format_inline_comment(&call_keyword, writer, formatter, true)?; - - // let target = self.target(); - // format_preceding_comments( - // &SyntaxElement::Node(target.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", target.syntax())?; - // format_inline_comment( - // &SyntaxElement::Node(target.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // if let Some(alias) = self.alias() { - // alias.format(writer, formatter)?; - // } - - // for after in self.after() { - // after.format(writer, formatter)?; - // } - - // let inputs: Vec<_> = self.inputs().collect(); - // if !inputs.is_empty() { - // let open_brace = first_child_of_kind(self.syntax(), - // SyntaxKind::OpenBrace); format_preceding_comments(&open_brace, - // writer, formatter, true)?; // Open braces should ignore the "+1 - // rule" followed by other interrupted // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, true)?; - - // // TODO consider detecting if document is >= v1.2 and forcing the - // optional input // syntax - // if let Some(input_keyword) = self - // .syntax() - // .children_with_tokens() - // .find(|c| c.kind() == SyntaxKind::InputKeyword) - // { - // format_preceding_comments(&input_keyword, writer, formatter, true)?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", input_keyword)?; - // format_inline_comment(&input_keyword, writer, formatter, true)?; - - // let colon = first_child_of_kind(self.syntax(), SyntaxKind::Colon); - // format_preceding_comments(&colon, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", colon)?; - // format_inline_comment(&colon, writer, formatter, true)?; - // } // else v1.2 syntax - - // if inputs.len() == 1 { - // let input = inputs.first().expect("inputs should have a first - // element"); format_preceding_comments( - // &SyntaxElement::from(input.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // input.format(writer, formatter)?; - // // TODO there may be a trailing comma with comments attached to it - - // let close_brace = first_child_of_kind(self.syntax(), - // SyntaxKind::CloseBrace); format_preceding_comments(& - // close_brace, writer, formatter, true)?; formatter. - // space_or_indent(writer)?; write!(writer, "{}", close_brace)?; - // } else { - // // multiple inputs - // let mut commas = self - // .syntax() - // .children_with_tokens() - // .filter(|c| c.kind() == SyntaxKind::Comma); - - // formatter.increment_indent(); - - // for input in inputs { - // if !formatter.interrupted() { - // write!(writer, "{}", NEWLINE)?; - // } else { - // formatter.reset_interrupted(); - // } - // format_preceding_comments( - // &SyntaxElement::from(input.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - // formatter.indent(writer)?; - // input.format(writer, formatter)?; - // if let Some(cur_comma) = commas.next() { - // format_preceding_comments(&cur_comma, writer, formatter, - // true)?; write!(writer, ",")?; - // format_inline_comment(&cur_comma, writer, formatter, true)?; - // } else { - // write!(writer, ",")?; - // } - // } - // if !formatter.interrupted() { - // write!(writer, "{}", NEWLINE)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), - // SyntaxKind::CloseBrace); format_preceding_comments(& - // close_brace, writer, formatter, false)?; formatter. - // indent(writer)?; write!(writer, "{}", close_brace)?; - // } - // } - - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for ConditionalStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let if_keyword = first_child_of_kind(self.syntax(), SyntaxKind::IfKeyword); - // formatter.indent(writer)?; - // write!(writer, "{}", if_keyword)?; - // format_inline_comment(&if_keyword, writer, formatter, true)?; - - // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); - // format_preceding_comments(&open_paren, writer, formatter, true)?; - // // Open parens should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_paren)?; - - // let mut paren_on_same_line = true; - // let expr = self.expr(); - // // PERF: This calls `to_string()` which is also called later by `format()` - // // There should be a way to avoid this. - // let multiline_expr = expr.syntax().to_string().contains(NEWLINE); - - // format_inline_comment(&open_paren, writer, formatter, !multiline_expr)?; - // if multiline_expr { - // formatter.increment_indent(); - // paren_on_same_line = false; - // } - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // !multiline_expr, - // )?; - // if formatter.interrupted() || multiline_expr { - // formatter.indent(writer)?; - // paren_on_same_line = false; - // } - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // !multiline_expr, - // )?; - // if formatter.interrupted() { - // paren_on_same_line = false; - // } - - // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); - // format_preceding_comments(&close_paren, writer, formatter, !multiline_expr)?; - // if formatter.interrupted() || !paren_on_same_line { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", close_paren)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for stmt in self.statements() { - // stmt.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for ScatterStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let scatter_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::ScatterKeyword); formatter.indent(writer)?; - // write!(writer, "{}", scatter_keyword)?; - // format_inline_comment(&scatter_keyword, writer, formatter, true)?; - - // let open_paren = first_child_of_kind(self.syntax(), SyntaxKind::OpenParen); - // format_preceding_comments(&open_paren, writer, formatter, true)?; - // // Open parens should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_paren)?; - // format_inline_comment(&open_paren, writer, formatter, true)?; - - // let ident = self.variable(); - // format_preceding_comments( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // ident.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(ident.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let in_keyword = first_child_of_kind(self.syntax(), SyntaxKind::InKeyword); - // format_preceding_comments(&in_keyword, writer, formatter, true)?; - // formatter.space_or_indent(writer)?; - // write!(writer, "{}", in_keyword)?; - // format_inline_comment(&in_keyword, writer, formatter, true)?; - - // let expr = self.expr(); - // format_preceding_comments( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // expr.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(expr.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let close_paren = first_child_of_kind(self.syntax(), SyntaxKind::CloseParen); - // format_preceding_comments(&close_paren, writer, formatter, true)?; - // if formatter.interrupted() { - // formatter.indent(writer)?; - // } - // write!(writer, "{}", close_paren)?; - // format_inline_comment(&close_paren, writer, formatter, true)?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // for stmt in self.statements() { - // stmt.format(writer, formatter)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} - -impl Formattable for WorkflowStatement { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - match self { - WorkflowStatement::Call(c) => c.format(writer, formatter), - WorkflowStatement::Conditional(c) => c.format(writer, formatter), - WorkflowStatement::Scatter(s) => s.format(writer, formatter), - WorkflowStatement::Declaration(d) => Decl::Bound(d.clone()).format(writer, formatter), - } - } -} - -impl Formattable for WorkflowDefinition { - fn format( - &self, - writer: &mut T, - formatter: &mut Formatter, - ) -> std::fmt::Result { - // format_preceding_comments( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // )?; - - // let workflow_keyword = first_child_of_kind(self.syntax(), - // SyntaxKind::WorkflowKeyword); write!(writer, "{}", - // workflow_keyword)?; format_inline_comment(&workflow_keyword, writer, - // formatter, true)?; - - // let name = self.name(); - // format_preceding_comments( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - // formatter.space_or_indent(writer)?; - // name.format(writer, formatter)?; - // format_inline_comment( - // &SyntaxElement::from(name.syntax().clone()), - // writer, - // formatter, - // true, - // )?; - - // let open_brace = first_child_of_kind(self.syntax(), SyntaxKind::OpenBrace); - // format_preceding_comments(&open_brace, writer, formatter, true)?; - // // Open braces should ignore the "+1 rule" followed by other interrupted - // // elements. - // if formatter.interrupted() { - // formatter.reset_interrupted(); - // formatter.indent(writer)?; - // } else { - // write!(writer, "{}", SPACE)?; - // } - // write!(writer, "{}", open_brace)?; - // format_inline_comment(&open_brace, writer, formatter, false)?; - - // formatter.increment_indent(); - - // let mut meta_section_str = String::new(); - // let mut parameter_meta_section_str = String::new(); - // let mut input_section_str = String::new(); - // let mut body_str = String::new(); - // let mut output_section_str = String::new(); - // let mut hints_section_str = String::new(); - - // for item in self.items() { - // match item { - // WorkflowItem::Metadata(m) => { - // m.format(&mut meta_section_str, formatter)?; - // } - // WorkflowItem::ParameterMetadata(pm) => { - // pm.format(&mut parameter_meta_section_str, formatter)?; - // } - // WorkflowItem::Input(i) => { - // i.format(&mut input_section_str, formatter)?; - // } - // WorkflowItem::Call(c) => { - // c.format(&mut body_str, formatter)?; - // } - // WorkflowItem::Conditional(c) => { - // c.format(&mut body_str, formatter)?; - // } - // WorkflowItem::Scatter(s) => { - // s.format(&mut body_str, formatter)?; - // } - // WorkflowItem::Declaration(d) => { - // Decl::Bound(d).format(&mut body_str, formatter)?; - // } - // WorkflowItem::Output(o) => { - // o.format(&mut output_section_str, formatter)?; - // } - // WorkflowItem::Hints(h) => { - // h.format(&mut hints_section_str, formatter)?; - // } - // } - // } - - // let mut first_section = true; - // if !meta_section_str.is_empty() { - // first_section = false; - // write!(writer, "{}", meta_section_str)?; - // } - // if !parameter_meta_section_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", parameter_meta_section_str)?; - // } - // if !input_section_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", input_section_str)?; - // } - // if !body_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", body_str)?; - // } - // if !output_section_str.is_empty() { - // if first_section { - // first_section = false; - // } else { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", output_section_str)?; - // } - // if !hints_section_str.is_empty() { - // if !first_section { - // write!(writer, "{}", NEWLINE)?; - // } - // write!(writer, "{}", hints_section_str)?; - // } - - // formatter.decrement_indent(); - - // let close_brace = first_child_of_kind(self.syntax(), SyntaxKind::CloseBrace); - // format_preceding_comments(&close_brace, writer, formatter, false)?; - // formatter.indent(writer)?; - // write!(writer, "{}", close_brace)?; - // format_inline_comment( - // &SyntaxElement::from(self.syntax().clone()), - // writer, - // formatter, - // false, - // ) - Ok(()) - } -} diff --git a/backup/wdl-format-old/tests/format.rs b/backup/wdl-format-old/tests/format.rs deleted file mode 100644 index d7d6dc5d8..000000000 --- a/backup/wdl-format-old/tests/format.rs +++ /dev/null @@ -1,192 +0,0 @@ -//! The format file tests. -//! -//! This test looks for directories in `tests/format`. -//! -//! Each directory is expected to contain: -//! -//! * `source.wdl` - the test input source to parse. -//! * `source.formatted` - the expected formatted output. -//! -//! The `source.formatted` file may be automatically generated or updated by -//! setting the `BLESS` environment variable when running this test. - -use std::collections::HashSet; -use std::env; -use std::ffi::OsStr; -use std::fs; -use std::path::Path; -use std::path::PathBuf; -use std::process::exit; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; - -use codespan_reporting::files::SimpleFile; -use codespan_reporting::term; -use codespan_reporting::term::termcolor::Buffer; -use codespan_reporting::term::Config; -use colored::Colorize; -use pretty_assertions::StrComparison; -use rayon::prelude::*; -use wdl_ast::Diagnostic; -use wdl_format::format_document; - -fn find_tests() -> Vec { - // Check for filter arguments consisting of test names - let mut filter = HashSet::new(); - for arg in std::env::args().skip_while(|a| a != "--").skip(1) { - if !arg.starts_with('-') { - filter.insert(arg); - } - } - - let mut tests: Vec = Vec::new(); - for entry in Path::new("tests/format").read_dir().unwrap() { - let entry = entry.expect("failed to read directory"); - let path = entry.path(); - if !path.is_dir() - || (!filter.is_empty() - && !filter.contains(entry.file_name().to_str().expect("name should be UTF-8"))) - { - continue; - } - - tests.push(path); - } - - tests.sort(); - tests -} - -fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { - let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); - let mut buffer = Buffer::no_color(); - for diagnostic in diagnostics { - term::emit( - &mut buffer, - &Config::default(), - &file, - &diagnostic.to_codespan(), - ) - .expect("should emit"); - } - - String::from_utf8(buffer.into_inner()).expect("should be UTF-8") -} - -fn compare_result(path: &Path, result: &str) -> Result<(), String> { - if env::var_os("BLESS").is_some() { - fs::write(path, &result).map_err(|e| { - format!( - "failed to write result file `{path}`: {e}", - path = path.display() - ) - })?; - return Ok(()); - } - - let expected = fs::read_to_string(path) - .map_err(|e| { - format!( - "failed to read result file `{path}`: {e}", - path = path.display() - ) - })? - .replace("\r\n", "\n"); - - if expected != result { - return Err(format!( - "result is not as expected:\n{}", - StrComparison::new(&expected, &result), - )); - } - - Ok(()) -} - -fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { - let path = test.join("source.wdl"); - let source = std::fs::read_to_string(&path).map_err(|e| { - format!( - "failed to read source file `{path}`: {e}", - path = path.display() - ) - })?; - - let formatted = format_document(&source).map_err(|e| { - format!( - "failed to format `{path}`: {e}", - path = path.display(), - e = format_diagnostics(&e, path.as_path(), &source) - ) - })?; - compare_result(path.with_extension("formatted.wdl").as_path(), &formatted)?; - - ntests.fetch_add(1, Ordering::SeqCst); - Ok(()) -} - -fn main() { - let tests = find_tests(); - println!("\nrunning {} tests\n", tests.len()); - - let ntests = AtomicUsize::new(0); - let errors = tests - .par_iter() - .filter_map(|test| { - let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); - match std::panic::catch_unwind(|| { - match run_test(test, &ntests) - .map_err(|e| format!("failed to run test `{path}`: {e}", path = test.display())) - .err() - { - Some(e) => { - println!("test {test_name} ... {failed}", failed = "failed".red()); - Some((test_name, e)) - } - None => { - println!("test {test_name} ... {ok}", ok = "ok".green()); - None - } - } - }) { - Ok(result) => result, - Err(e) => { - println!( - "test {test_name} ... {panicked}", - panicked = "panicked".red() - ); - Some(( - test_name, - format!( - "test panicked: {e:?}", - e = e - .downcast_ref::() - .map(|s| s.as_str()) - .or_else(|| e.downcast_ref::<&str>().copied()) - .unwrap_or("no panic message") - ), - )) - } - } - }) - .collect::>(); - - if !errors.is_empty() { - eprintln!( - "\n{count} test(s) {failed}:", - count = errors.len(), - failed = "failed".red() - ); - - for (name, msg) in errors.iter() { - eprintln!("{name}: {msg}", msg = msg.red()); - } - - exit(1); - } - - println!( - "\ntest result: ok. {} passed\n", - ntests.load(Ordering::SeqCst) - ); -} diff --git a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt deleted file mode 100644 index d9a98e06c..000000000 --- a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt +++ /dev/null @@ -1,25 +0,0 @@ -'source.wdl' obtained from: https://github.com/ENCODE-DCC/chip-seq-pipeline2/blob/26eeda81a0540dc793fc69b0c390d232ca7ca50a/chip.wdl -on the date 08-05-2024. -It was accompanied by the following license: - -MIT License - -Copyright (c) 2017 ENCODE DCC - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl deleted file mode 100644 index 7c8de0324..000000000 --- a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ /dev/null @@ -1 +0,0 @@ -version 1.0 diff --git a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl b/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl deleted file mode 100644 index 92c09ea84..000000000 --- a/backup/wdl-format-old/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl +++ /dev/null @@ -1,3296 +0,0 @@ -version 1.0 - -struct RuntimeEnvironment { - String docker - String singularity - String conda -} - -workflow chip { - String pipeline_ver = 'v2.2.2' - - meta { - version: 'v2.2.2' - - author: 'Jin wook Lee' - email: 'leepc12@gmail.com' - description: 'ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil.' - organization: 'ENCODE DCC' - - specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing' - - default_docker: 'encodedcc/chip-seq-pipeline:v2.2.2' - default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' - croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json' - - parameter_group: { - runtime_environment: { - title: 'Runtime environment', - description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.' - }, - pipeline_metadata: { - title: 'Pipeline metadata', - description: 'Metadata for a pipeline (e.g. title and description).' - }, - reference_genome: { - title: 'Reference genome', - description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.', - help: 'Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.' - }, - input_genomic_data: { - title: 'Input genomic data', - description: 'Genomic input files for experiment.', - help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].' - }, - input_genomic_data_control: { - title: 'Input genomic data (control)', - description: 'Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.', - help: 'Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.' - }, - pipeline_parameter: { - title: 'Pipeline parameter', - description: 'Pipeline type and flags to turn on/off analyses.', - help: 'Use chip.align_only to align FASTQs without peak calling.' - }, - alignment: { - title: 'Alignment', - description: 'Parameters for alignment.', - help: 'Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.' - }, - peak_calling: { - title: 'Peak calling', - description: 'Parameters for peak calling.', - help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.' - }, - resource_parameter: { - title: 'Resource parameter', - description: 'Number of CPUs (threads), max. memory and walltime for tasks.', - help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.' - } - } - } - input { - # group: runtime_environment - String docker = 'encodedcc/chip-seq-pipeline:v2.2.2' - String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' - String conda = 'encd-chip' - String conda_macs2 = 'encd-chip-macs2' - String conda_spp = 'encd-chip-spp' - - # group: pipeline_metadata - String title = 'Untitled' - String description = 'No description' - - # group: reference_genome - File? genome_tsv - String? genome_name - File? ref_fa - File? bwa_idx_tar - File? bowtie2_idx_tar - File? chrsz - File? blacklist - File? blacklist2 - String? mito_chr_name - String? regex_bfilt_peak_chr_name - String? gensz - File? custom_aligner_idx_tar - - # group: input_genomic_data - Boolean? paired_end - Array[Boolean] paired_ends = [] - Array[File] fastqs_rep1_R1 = [] - Array[File] fastqs_rep1_R2 = [] - Array[File] fastqs_rep2_R1 = [] - Array[File] fastqs_rep2_R2 = [] - Array[File] fastqs_rep3_R1 = [] - Array[File] fastqs_rep3_R2 = [] - Array[File] fastqs_rep4_R1 = [] - Array[File] fastqs_rep4_R2 = [] - Array[File] fastqs_rep5_R1 = [] - Array[File] fastqs_rep5_R2 = [] - Array[File] fastqs_rep6_R1 = [] - Array[File] fastqs_rep6_R2 = [] - Array[File] fastqs_rep7_R1 = [] - Array[File] fastqs_rep7_R2 = [] - Array[File] fastqs_rep8_R1 = [] - Array[File] fastqs_rep8_R2 = [] - Array[File] fastqs_rep9_R1 = [] - Array[File] fastqs_rep9_R2 = [] - Array[File] fastqs_rep10_R1 = [] - Array[File] fastqs_rep10_R2 = [] - Array[File] bams = [] - Array[File] nodup_bams = [] - Array[File] tas = [] - Array[File] peaks = [] - Array[File] peaks_pr1 = [] - Array[File] peaks_pr2 = [] - File? peak_ppr1 - File? peak_ppr2 - File? peak_pooled - - Boolean? ctl_paired_end - Array[Boolean] ctl_paired_ends = [] - Array[File] ctl_fastqs_rep1_R1 = [] - Array[File] ctl_fastqs_rep1_R2 = [] - Array[File] ctl_fastqs_rep2_R1 = [] - Array[File] ctl_fastqs_rep2_R2 = [] - Array[File] ctl_fastqs_rep3_R1 = [] - Array[File] ctl_fastqs_rep3_R2 = [] - Array[File] ctl_fastqs_rep4_R1 = [] - Array[File] ctl_fastqs_rep4_R2 = [] - Array[File] ctl_fastqs_rep5_R1 = [] - Array[File] ctl_fastqs_rep5_R2 = [] - Array[File] ctl_fastqs_rep6_R1 = [] - Array[File] ctl_fastqs_rep6_R2 = [] - Array[File] ctl_fastqs_rep7_R1 = [] - Array[File] ctl_fastqs_rep7_R2 = [] - Array[File] ctl_fastqs_rep8_R1 = [] - Array[File] ctl_fastqs_rep8_R2 = [] - Array[File] ctl_fastqs_rep9_R1 = [] - Array[File] ctl_fastqs_rep9_R2 = [] - Array[File] ctl_fastqs_rep10_R1 = [] - Array[File] ctl_fastqs_rep10_R2 = [] - Array[File] ctl_bams = [] - Array[File] ctl_nodup_bams = [] - Array[File] ctl_tas = [] - - # group: pipeline_parameter - String pipeline_type - Boolean align_only = false - Boolean redact_nodup_bam = false - Boolean true_rep_only = false - Boolean enable_count_signal_track = false - Boolean enable_jsd = true - Boolean enable_gc_bias = true - - # group: alignment - String aligner = 'bowtie2' - File? custom_align_py - Boolean use_bwa_mem_for_pe = false - Int bwa_mem_read_len_limit = 70 - Boolean use_bowtie2_local_mode = false - Int crop_length = 0 - Int crop_length_tol = 2 - String trimmomatic_phred_score_format = 'auto' - Int xcor_trim_bp = 50 - Boolean use_filt_pe_ta_for_xcor = false - String dup_marker = 'picard' - Boolean no_dup_removal = false - Int mapq_thresh = 30 - Array[String] filter_chrs = [] - Int subsample_reads = 0 - Int ctl_subsample_reads = 0 - Int xcor_subsample_reads = 15000000 - Int xcor_exclusion_range_min = -500 - Int? xcor_exclusion_range_max - Int pseudoreplication_random_seed = 0 - - # group: peak_calling - Int ctl_depth_limit = 200000000 - Float exp_ctl_depth_ratio_limit = 5.0 - Array[Int?] fraglen = [] - String? peak_caller - Boolean always_use_pooled_ctl = true - Float ctl_depth_ratio = 1.2 - Int? cap_num_peak - Float pval_thresh = 0.01 - Float fdr_thresh = 0.01 - Float idr_thresh = 0.05 - - # group: resource_parameter - Int align_cpu = 6 - Float align_bowtie2_mem_factor = 0.15 - Float align_bwa_mem_factor = 1.0 - Int align_time_hr = 48 - Float align_bowtie2_disk_factor = 8.0 - Float align_bwa_disk_factor = 8.0 - - Int filter_cpu = 4 - Float filter_mem_factor = 0.4 - Int filter_time_hr = 24 - Float filter_disk_factor = 8.0 - - Int bam2ta_cpu = 2 - Float bam2ta_mem_factor = 0.35 - Int bam2ta_time_hr = 6 - Float bam2ta_disk_factor = 4.0 - - Float spr_mem_factor = 20.0 - Float spr_disk_factor = 30.0 - - Int jsd_cpu = 4 - Float jsd_mem_factor = 0.1 - Int jsd_time_hr = 6 - Float jsd_disk_factor = 2.0 - - Int xcor_cpu = 2 - Float xcor_mem_factor = 1.0 - Int xcor_time_hr = 24 - Float xcor_disk_factor = 4.5 - - Float subsample_ctl_mem_factor = 22.0 - Float subsample_ctl_disk_factor = 15.0 - - Float macs2_signal_track_mem_factor = 12.0 - Int macs2_signal_track_time_hr = 24 - Float macs2_signal_track_disk_factor = 80.0 - - Int call_peak_cpu = 6 - Float call_peak_spp_mem_factor = 5.0 - Float call_peak_macs2_mem_factor = 5.0 - Int call_peak_time_hr = 72 - Float call_peak_spp_disk_factor = 5.0 - Float call_peak_macs2_disk_factor = 30.0 - - String? align_trimmomatic_java_heap - String? filter_picard_java_heap - String? gc_bias_picard_java_heap - } - - parameter_meta { - docker: { - description: 'Default Docker image URI to run WDL tasks.', - group: 'runtime_environment', - example: 'ubuntu:20.04' - } - singularity: { - description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.', - group: 'runtime_environment', - example: 'docker://ubuntu:20.04' - } - conda: { - description: 'Default Conda environment name to run WDL tasks. For Conda users only.', - group: 'runtime_environment', - example: 'encd-chip' - } - conda_macs2: { - description: 'Conda environment name for task macs2. For Conda users only.', - group: 'runtime_environment', - example: 'encd-chip-macs2' - } - conda_spp: { - description: 'Conda environment name for tasks spp/xcor. For Conda users only.', - group: 'runtime_environment', - example: 'encd-chip-spp' - } - title: { - description: 'Experiment title.', - group: 'pipeline_metadata', - example: 'ENCSR936XTK (subsampled 1/50)' - } - description: { - description: 'Experiment description.', - group: 'pipeline_metadata', - example: 'ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)' - } - genome_tsv: { - description: 'Reference genome database TSV.', - group: 'reference_genome', - help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.', - example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv' - } - genome_name: { - description: 'Genome name.', - group: 'reference_genome' - } - ref_fa: { - description: 'Reference FASTA file.', - group: 'reference_genome' - } - bowtie2_idx_tar: { - description: 'BWA index TAR file.', - group: 'reference_genome' - } - custom_aligner_idx_tar: { - description: 'Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.', - group: 'reference_genome' - } - chrsz: { - description: '2-col chromosome sizes file.', - group: 'reference_genome' - } - blacklist: { - description: 'Blacklist file in BED format.', - group: 'reference_genome', - help: 'Peaks will be filtered with this file.' - } - blacklist2: { - description: 'Secondary blacklist file in BED format.', - group: 'reference_genome', - help: 'If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.' - } - mito_chr_name: { - description: 'Mitochondrial chromosome name.', - group: 'reference_genome', - help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.' - } - regex_bfilt_peak_chr_name: { - description: 'Reg-ex for chromosomes to keep while filtering peaks.', - group: 'reference_genome', - help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.' - } - gensz: { - description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.', - group: 'reference_genome' - } - paired_end: { - description: 'Sequencing endedness.', - group: 'input_genomic_data', - help: 'Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.', - example: true - } - paired_ends: { - description: 'Sequencing endedness array (for mixed SE/PE datasets).', - group: 'input_genomic_data', - help: 'Whether each biological replicate is paired ended or not.' - } - fastqs_rep1_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 1.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz' - ] - } - fastqs_rep1_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 1.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz' - ] - } - fastqs_rep2_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 2.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz' - ] - } - fastqs_rep2_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 2.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz' - ] - } - fastqs_rep3_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 3.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep3_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 3.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep4_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 4.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep4_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 4.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep5_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 5.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep5_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 5.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep6_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 6.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep6_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 6.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep7_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 7.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep7_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 7.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep8_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 8.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep8_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 8.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep9_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 9.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep9_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 9.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep10_R1: { - description: 'Read1 FASTQs to be merged for a biological replicate 10.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' - } - fastqs_rep10_R2: { - description: 'Read2 FASTQs to be merged for a biological replicate 10.', - group: 'input_genomic_data', - help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' - } - bams: { - description: 'List of unfiltered/raw BAM files for each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].' - } - nodup_bams: { - description: 'List of filtered/deduped BAM files for each biological replicate', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].' - } - tas: { - description: 'List of TAG-ALIGN files for each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].' - } - peaks: { - description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.' - } - peaks_pr1: { - description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' - } - peaks_pr2: { - description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' - } - peak_pooled: { - description: 'NARROWPEAK file for pooled true replicate.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.' - } - peak_ppr1: { - description: 'NARROWPEAK file for pooled pseudo replicate 1.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.' - } - peak_ppr2: { - description: 'NARROWPEAK file for pooled pseudo replicate 2.', - group: 'input_genomic_data', - help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.' - } - - ctl_paired_end: { - description: 'Sequencing endedness for all controls.', - group: 'input_genomic_data_control', - help: 'Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.' - } - ctl_paired_ends: { - description: 'Sequencing endedness array for mixed SE/PE controls.', - group: 'input_genomic_data_control', - help: 'Whether each control replicate is paired ended or not.' - } - ctl_fastqs_rep1_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 1.', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep1_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 1.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep2_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 2.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep2_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 2.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', - example: [ - 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz' - ] - } - ctl_fastqs_rep3_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 3.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep3_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 3.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep4_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 4.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep4_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 4.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep5_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 5.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep5_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 5.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep6_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 6.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep6_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 6.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep7_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 7.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep7_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 7.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep8_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 8.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep8_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 8.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep9_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 9.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep9_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 9.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep10_R1: { - description: 'Read1 FASTQs to be merged for a control replicate 10.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' - } - ctl_fastqs_rep10_R2: { - description: 'Read2 FASTQs to be merged for a control replicate 10.', - group: 'input_genomic_data_control', - help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' - } - ctl_bams: { - description: 'List of unfiltered/raw BAM files for each control replicate.', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].' - } - ctl_nodup_bams: { - description: 'List of filtered/deduped BAM files for each control replicate', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].' - } - ctl_tas: { - description: 'List of TAG-ALIGN files for each biological replicate.', - group: 'input_genomic_data_control', - help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].' - } - - pipeline_type: { - description: 'Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.', - group: 'pipeline_parameter', - help: 'Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.', - choices: ['tf', 'histone', 'control'], - example: 'tf' - } - redact_nodup_bam: { - description: 'Redact filtered/nodup BAM.', - group: 'pipeline_parameter', - help: 'Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.' - } - align_only: { - description: 'Align only mode.', - group: 'pipeline_parameter', - help: 'Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.' - } - true_rep_only: { - description: 'Disables all analyses related to pseudo-replicates.', - group: 'pipeline_parameter', - help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).' - } - enable_count_signal_track: { - description: 'Enables generation of count signal tracks.', - group: 'pipeline_parameter' - } - enable_jsd: { - description: 'Enables Jensen-Shannon Distance (JSD) plot generation.', - group: 'pipeline_parameter' - } - enable_gc_bias: { - description: 'Enables GC bias calculation.', - group: 'pipeline_parameter' - } - - aligner: { - description: 'Aligner. bowtie2, bwa or custom', - group: 'alignment', - help: 'It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', - choices: ['bowtie2', 'bwa', 'custom'], - example: 'bowtie2' - } - custom_align_py: { - description: 'Python script for a custom aligner.', - group: 'alignment', - help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".' - } - use_bwa_mem_for_pe: { - description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.', - group: 'alignment', - help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.' - } - bwa_mem_read_len_limit: { - description: 'Read length limit for bwa mem (for PE FASTQs only).', - group: 'alignment', - help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.' - } - use_bowtie2_local_mode: { - description: 'Use bowtie2\'s local mode (soft-clipping).', - group: 'alignment', - help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.' - } - crop_length: { - description: 'Crop FASTQs\' reads longer than this length.', - group: 'alignment', - help: 'Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.' - } - crop_length_tol: { - description: 'Tolerance for cropping reads in FASTQs.', - group: 'alignment', - help: 'Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.' - } - trimmomatic_phred_score_format: { - description: 'Base encoding (format) for Phred score in FASTQs.', - group: 'alignment', - choices: ['auto', 'phred33', 'phred64'], - help: 'This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".' - } - xcor_trim_bp: { - description: 'Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.', - group: 'alignment', - help: 'This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.' - } - use_filt_pe_ta_for_xcor: { - description: 'Use filtered PE BAM for cross-correlation analysis.', - group: 'alignment', - help: 'If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.' - } - dup_marker: { - description: 'Marker for duplicate reads. picard or sambamba.', - group: 'alignment', - help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.', - choices: ['picard', 'sambamba'], - example: 'picard' - } - no_dup_removal: { - description: 'Disable removal of duplicate reads during filtering BAM.', - group: 'alignment', - help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.' - } - mapq_thresh: { - description: 'Threshold for low MAPQ reads removal.', - group: 'alignment', - help: 'Low MAPQ reads are filtered out while filtering BAM.' - } - filter_chrs: { - description: 'List of chromosomes to be filtered out while filtering BAM.', - group: 'alignment', - help: 'It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.' - } - subsample_reads: { - description: 'Subsample reads. Shuffle and subsample reads.', - group: 'alignment', - help: 'This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' - } - ctl_subsample_reads: { - description: 'Subsample control reads. Shuffle and subsample control reads.', - group: 'alignment', - help: 'This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' - } - xcor_subsample_reads: { - description: 'Subsample reads for cross-corrlelation analysis only.', - group: 'alignment', - help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.' - } - xcor_exclusion_range_min: { - description: 'Exclusion minimum for cross-correlation analysis.', - group: 'alignment', - help: 'For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.' - } - xcor_exclusion_range_max: { - description: 'Exclusion maximum for cross-coorrelation analysis.', - group: 'alignment', - help: 'For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used' - } - pseudoreplication_random_seed: { - description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).', - group: 'alignment', - help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.' - } - ctl_depth_limit: { - description: 'Hard limit for chosen control\'s depth.', - group: 'peak_calling', - help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.' - } - exp_ctl_depth_ratio_limit: { - description: 'Second limit for chosen control\'s depth.', - group: 'peak_calling', - help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.' - } - fraglen: { - description: 'Fragment length for each biological replicate.', - group: 'peak_calling', - help: 'Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.' - } - peak_caller: { - description: 'Peak caller.', - group: 'peak_calling', - help: 'It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).', - example: 'spp' - } - always_use_pooled_ctl: { - description: 'Always choose a pooled control for each experiment replicate.', - group: 'peak_calling', - help: 'If turned on, ignores chip.ctl_depth_ratio.' - } - ctl_depth_ratio: { - description: 'Maximum depth ratio between control replicates.', - group: 'peak_calling', - help: 'If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.' - } - - cap_num_peak: { - description: 'Upper limit on the number of peaks.', - group: 'peak_calling', - help: 'It is 30000000 and 50000000 by default for spp and macs2, respectively.' - } - pval_thresh: { - description: 'p-value Threshold for MACS2 peak caller.', - group: 'peak_calling', - help: 'macs2 callpeak -p' - } - fdr_thresh: { - description: 'FDR threshold for spp peak caller (phantompeakqualtools).', - group: 'peak_calling', - help: 'run_spp.R -fdr=' - } - idr_thresh: { - description: 'IDR threshold.', - group: 'peak_calling' - } - - align_cpu: { - description: 'Number of cores for task align.', - group: 'resource_parameter', - help: 'Task align merges/crops/maps FASTQs.' - } - align_bowtie2_mem_factor: { - description: 'Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - align_bwa_mem_factor: { - description: 'Multiplication factor to determine memory required for task align with bwa as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - align_time_hr: { - description: 'Walltime (h) required for task align.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - align_bowtie2_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' - } - align_bwa_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task align with bwa as aligner.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' - } - filter_cpu: { - description: 'Number of cores for task filter.', - group: 'resource_parameter', - help: 'Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.' - } - filter_mem_factor: { - description: 'Multiplication factor to determine memory required for task filter.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - filter_time_hr: { - description: 'Walltime (h) required for task filter.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - filter_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task filter.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.' - } - bam2ta_cpu: { - description: 'Number of cores for task bam2ta.', - group: 'resource_parameter', - help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.' - } - bam2ta_mem_factor: { - description: 'Multiplication factor to determine memory required for task bam2ta.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - bam2ta_time_hr: { - description: 'Walltime (h) required for task bam2ta.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - bam2ta_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task bam2ta.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' - } - spr_mem_factor: { - description: 'Multiplication factor to determine memory required for task spr.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - spr_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task spr.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' - } - jsd_cpu: { - description: 'Number of cores for task jsd.', - group: 'resource_parameter', - help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.' - } - jsd_mem_factor: { - description: 'Multiplication factor to determine memory required for task jsd.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - jsd_time_hr: { - description: 'Walltime (h) required for task jsd.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - jsd_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task jsd.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' - } - xcor_cpu: { - description: 'Number of cores for task xcor.', - group: 'resource_parameter', - help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.' - } - xcor_mem_factor: { - description: 'Multiplication factor to determine memory required for task xcor.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - xcor_time_hr: { - description: 'Walltime (h) required for task xcor.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - xcor_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task xcor.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - subsample_ctl_mem_factor: { - description: 'Multiplication factor to determine memory required for task subsample_ctl.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - subsample_ctl_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task subsample_ctl.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - call_peak_cpu: { - description: 'Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.', - group: 'resource_parameter', - help: 'Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.' - } - call_peak_spp_mem_factor: { - description: 'Multiplication factor to determine memory required for task call_peak with spp as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - call_peak_macs2_mem_factor: { - description: 'Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - call_peak_time_hr: { - description: 'Walltime (h) required for task call_peak.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - call_peak_spp_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - call_peak_macs2_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - macs2_signal_track_mem_factor: { - description: 'Multiplication factor to determine memory required for task macs2_signal_track.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' - } - macs2_signal_track_time_hr: { - description: 'Walltime (h) required for task macs2_signal_track.', - group: 'resource_parameter', - help: 'This is for HPCs only. e.g. SLURM, SGE, ...' - } - macs2_signal_track_disk_factor: { - description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.', - group: 'resource_parameter', - help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' - } - align_trimmomatic_java_heap: { - description: 'Maximum Java heap (java -Xmx) in task align.', - group: 'resource_parameter', - help: 'Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.' - } - filter_picard_java_heap: { - description: 'Maximum Java heap (java -Xmx) in task filter.', - group: 'resource_parameter', - help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.' - } - gc_bias_picard_java_heap: { - description: 'Maximum Java heap (java -Xmx) in task gc_bias.', - group: 'resource_parameter', - help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.' - } - } - RuntimeEnvironment runtime_environment = { - 'docker': docker, 'singularity': singularity, 'conda': conda - } - RuntimeEnvironment runtime_environment_spp = { - 'docker': docker, 'singularity': singularity, 'conda': conda_spp - } - RuntimeEnvironment runtime_environment_macs2 = { - 'docker': docker, 'singularity': singularity, 'conda': conda_macs2 - } - - # read genome data and paths - if ( defined(genome_tsv) ) { - call read_genome_tsv { input: - genome_tsv = genome_tsv, - runtime_environment = runtime_environment - } - } - File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa]) - File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar - else read_genome_tsv.bwa_idx_tar - File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar]) - File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz]) - String gensz_ = select_first([gensz, read_genome_tsv.gensz]) - File? blacklist1_ = if defined(blacklist) then blacklist - else read_genome_tsv.blacklist - File? blacklist2_ = if defined(blacklist2) then blacklist2 - else read_genome_tsv.blacklist2 - # merge multiple blacklists - # two blacklists can have different number of columns (3 vs 6) - # so we limit merged blacklist's columns to 3 - Array[File] blacklists = select_all([blacklist1_, blacklist2_]) - if ( length(blacklists) > 1 ) { - call pool_ta as pool_blacklist { input: - tas = blacklists, - col = 3, - runtime_environment = runtime_environment - } - } - File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled - else if length(blacklists) > 0 then blacklists[0] - else blacklist2_ - String mito_chr_name_ = select_first([mito_chr_name, read_genome_tsv.mito_chr_name]) - String regex_bfilt_peak_chr_name_ = select_first([regex_bfilt_peak_chr_name, read_genome_tsv.regex_bfilt_peak_chr_name]) - String genome_name_ = select_first([genome_name, read_genome_tsv.genome_name, basename(chrsz_)]) - - ### temp vars (do not define these) - String aligner_ = if defined(custom_align_py) then 'custom' else aligner - String peak_caller_ = if pipeline_type=='tf' then select_first([peak_caller, 'spp']) - else select_first([peak_caller, 'macs2']) - String peak_type_ = if peak_caller_=='spp' then 'regionPeak' - else 'narrowPeak' - Boolean enable_idr = pipeline_type=='tf' # enable_idr for TF chipseq only - String idr_rank_ = if peak_caller_=='spp' then 'signal.value' - else if peak_caller_=='macs2' then 'p.value' - else 'p.value' - Int cap_num_peak_spp = 300000 - Int cap_num_peak_macs2 = 500000 - Int cap_num_peak_ = if peak_caller_ == 'spp' then select_first([cap_num_peak, cap_num_peak_spp]) - else select_first([cap_num_peak, cap_num_peak_macs2]) - Int mapq_thresh_ = mapq_thresh - Boolean enable_xcor_ = if pipeline_type=='control' then false else true - Boolean enable_count_signal_track_ = if pipeline_type=='control' then false else enable_count_signal_track - Boolean enable_jsd_ = if pipeline_type=='control' then false else enable_jsd - Boolean enable_gc_bias_ = if pipeline_type=='control' then false else enable_gc_bias - Boolean align_only_ = if pipeline_type=='control' then true else align_only - - Float align_mem_factor_ = if aligner_ =='bowtie2' then align_bowtie2_mem_factor - else align_bwa_mem_factor - Float align_disk_factor_ = if aligner_ =='bowtie2' then align_bowtie2_disk_factor - else align_bwa_disk_factor - Float call_peak_mem_factor_ = if peak_caller_ =='spp' then call_peak_spp_mem_factor - else call_peak_macs2_mem_factor - Float call_peak_disk_factor_ = if peak_caller_ =='spp' then call_peak_spp_disk_factor - else call_peak_macs2_disk_factor - - # temporary 2-dim fastqs array [rep_id][merge_id] - Array[Array[File]] fastqs_R1 = - if length(fastqs_rep10_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1, fastqs_rep10_R1] - else if length(fastqs_rep9_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1] - else if length(fastqs_rep8_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1] - else if length(fastqs_rep7_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1, fastqs_rep7_R1] - else if length(fastqs_rep6_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, - fastqs_rep6_R1] - else if length(fastqs_rep5_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1] - else if length(fastqs_rep4_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1] - else if length(fastqs_rep3_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1] - else if length(fastqs_rep2_R1)>0 then - [fastqs_rep1_R1, fastqs_rep2_R1] - else if length(fastqs_rep1_R1)>0 then - [fastqs_rep1_R1] - else [] - # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) - Array[Array[File]] fastqs_R2 = - [fastqs_rep1_R2, fastqs_rep2_R2, fastqs_rep3_R2, fastqs_rep4_R2, fastqs_rep5_R2, - fastqs_rep6_R2, fastqs_rep7_R2, fastqs_rep8_R2, fastqs_rep9_R2, fastqs_rep10_R2] - - # temporary 2-dim ctl fastqs array [rep_id][merge_id] - Array[Array[File]] ctl_fastqs_R1 = - if length(ctl_fastqs_rep10_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1] - else if length(ctl_fastqs_rep9_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1] - else if length(ctl_fastqs_rep8_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1] - else if length(ctl_fastqs_rep7_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1] - else if length(ctl_fastqs_rep6_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1] - else if length(ctl_fastqs_rep5_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1] - else if length(ctl_fastqs_rep4_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1] - else if length(ctl_fastqs_rep3_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1] - else if length(ctl_fastqs_rep2_R1)>0 then - [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1] - else if length(ctl_fastqs_rep1_R1)>0 then - [ctl_fastqs_rep1_R1] - else [] - # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) - Array[Array[File]] ctl_fastqs_R2 = - [ctl_fastqs_rep1_R2, ctl_fastqs_rep2_R2, ctl_fastqs_rep3_R2, ctl_fastqs_rep4_R2, ctl_fastqs_rep5_R2, - ctl_fastqs_rep6_R2, ctl_fastqs_rep7_R2, ctl_fastqs_rep8_R2, ctl_fastqs_rep9_R2, ctl_fastqs_rep10_R2] - - # temporary variables to get number of replicates - # WDLic implementation of max(A,B,C,...) - Int num_rep_fastq = length(fastqs_R1) - Int num_rep_bam = if length(bams) 0 || num_ctl_fastq > 0) && aligner_ != 'bwa' && aligner_ != 'bowtie2' && aligner_ != 'custom' ) { - call raise_exception as error_wrong_aligner { input: - msg = 'Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.', - runtime_environment = runtime_environment - } - } - if ( aligner_ != 'bwa' && use_bwa_mem_for_pe ) { - call raise_exception as error_use_bwa_mem_for_non_bwa { input: - msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.', - runtime_environment = runtime_environment - } - } - if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) { - call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: - msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.', - runtime_environment = runtime_environment - } - } - if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) { - call raise_exception as error_custom_aligner { input: - msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', - runtime_environment = runtime_environment - } - } - - if ( ( ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0 ) && num_ctl > 1 && length(ctl_paired_ends) > 1 ) { - call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: - msg = 'Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for ' + - 'multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). ' + - 'Automatic control subsampling is enabled by default. ' + - 'Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. ' + - 'You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done ' + - 'for individual control\'s TAG-ALIGN output according to each control\'s endedness. ', - runtime_environment = runtime_environment - } - } - if ( pipeline_type == 'control' && num_ctl > 0 ) { - call raise_exception as error_ctl_input_defined_in_control_mode { input: - msg = 'In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.', - runtime_environment = runtime_environment - } - } - if ( pipeline_type == 'control' && num_rep_fastq == 0 ) { - call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: - msg = 'Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.', - runtime_environment = runtime_environment - } - } - - # align each replicate - scatter(i in range(num_rep)) { - # to override endedness definition for individual replicate - # paired_end will override paired_ends[i] - Boolean paired_end_ = if !defined(paired_end) && i0 - Boolean has_output_of_align = i0 - Boolean has_output_of_align_ctl = i1 ) { - # pool tagaligns from true replicates - call pool_ta { input : - tas = ta_, - prefix = 'rep', - runtime_environment = runtime_environment - } - } - - # if there are pr1 TAs for ALL replicates then pool them - Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1))==num_rep - if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { - # pool tagaligns from pseudo replicate 1 - call pool_ta as pool_ta_pr1 { input : - tas = spr.ta_pr1, - prefix = 'rep-pr1', - runtime_environment = runtime_environment - } - } - - # if there are pr2 TAs for ALL replicates then pool them - Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2))==num_rep - if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { - # pool tagaligns from pseudo replicate 2 - call pool_ta as pool_ta_pr2 { input : - tas = spr.ta_pr2, - prefix = 'rep-pr2', - runtime_environment = runtime_environment - } - } - - # if there are CTL TAs for ALL replicates then pool them - Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_))==num_ctl - if ( has_all_inputs_of_pool_ta_ctl && num_ctl>1 ) { - # pool tagaligns from true replicates - call pool_ta as pool_ta_ctl { input : - tas = ctl_ta_, - prefix = 'ctl', - runtime_environment = runtime_environment - } - } - - Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) - if ( has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep>1 ) { - call count_signal_track as count_signal_track_pooled { input : - ta = pool_ta.ta_pooled, - chrsz = chrsz_, - runtime_environment = runtime_environment - } - } - - Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_))==num_rep - if ( has_input_of_jsd && num_rep > 0 && enable_jsd_ ) { - # fingerprint and JS-distance plot - call jsd { input : - nodup_bams = nodup_bam_, - ctl_bams = ctl_nodup_bam_, # use first control only - blacklist = blacklist_, - mapq_thresh = mapq_thresh_, - - cpu = jsd_cpu, - mem_factor = jsd_mem_factor, - time_hr = jsd_time_hr, - disk_factor = jsd_disk_factor, - runtime_environment = runtime_environment - } - } - - Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep - && length(select_all(ctl_ta_))==num_ctl && num_ctl > 0 - if ( has_all_input_of_choose_ctl && !align_only_ ) { - # choose appropriate control for each exp IP replicate - # outputs: - # choose_ctl.idx : control replicate index for each exp replicate - # -1 means pooled ctl replicate - call choose_ctl { input: - tas = ta_, - ctl_tas = ctl_ta_, - ta_pooled = pool_ta.ta_pooled, - ctl_ta_pooled = pool_ta_ctl.ta_pooled, - always_use_pooled_ctl = always_use_pooled_ctl, - ctl_depth_ratio = ctl_depth_ratio, - ctl_depth_limit = ctl_depth_limit, - exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, - runtime_environment = runtime_environment - } - } - - scatter(i in range(num_rep)) { - # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] - # chosen_ctl_ta_id - # >=0: control TA index (this means that control TA with this index exists) - # -1: use pooled control - # -2: there is no control - Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then - select_first([choose_ctl.chosen_ctl_ta_ids])[i] else -2 - Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then - select_first([choose_ctl.chosen_ctl_ta_subsample])[i] else 0 - Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false - else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] - else ctl_paired_end_[chosen_ctl_ta_id] - - if ( chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0 ) { - call subsample_ctl { input: - ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled - else ctl_ta_[ chosen_ctl_ta_id ], - subsample = chosen_ctl_ta_subsample, - paired_end = chosen_ctl_paired_end, - mem_factor = subsample_ctl_mem_factor, - disk_factor = subsample_ctl_disk_factor, - runtime_environment = runtime_environment - } - } - Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] - else if chosen_ctl_ta_subsample > 0 then [ select_first([subsample_ctl.ta_subsampled]) ] - else if chosen_ctl_ta_id == -1 then [ select_first([pool_ta_ctl.ta_pooled]) ] - else [ select_first([ctl_ta_[ chosen_ctl_ta_id ]]) ] - } - Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then - select_first([choose_ctl.chosen_ctl_ta_subsample_pooled]) else 0 - - # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) - Array[Int] fraglen_tmp = select_all(fraglen_) - - # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks - scatter(i in range(num_rep)) { - Boolean has_input_of_call_peak = defined(ta_[i]) - Boolean has_output_of_call_peak = i 1 ) { - # rounded mean of fragment length, which will be used for - # 1) calling peaks for pooled true/pseudo replicates - # 2) calculating FRiP - call rounded_mean as fraglen_mean { input : - ints = fraglen_tmp, - runtime_environment = runtime_environment - } - # } - - if ( has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0 ) { - call subsample_ctl as subsample_ctl_pooled { input: - ta = if num_ctl < 2 then ctl_ta_[0] - else pool_ta_ctl.ta_pooled, - subsample = chosen_ctl_ta_pooled_subsample, - paired_end = ctl_paired_end_[0], - mem_factor = subsample_ctl_mem_factor, - disk_factor = subsample_ctl_disk_factor, - runtime_environment = runtime_environment - } - } - # actually not an array - Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] - else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled ] - else if num_ctl < 2 then [ ctl_ta_[0] ] - else [ pool_ta_ctl.ta_pooled ] - - Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) - Boolean has_output_of_call_peak_pooled = defined(peak_pooled) - if ( has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep>1 ) { - # call peaks on pooled replicate - # always call peaks for pooled replicate to get signal tracks - call call_peak as call_peak_pooled { input : - peak_caller = peak_caller_, - peak_type = peak_type_, - tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - cap_num_peak = cap_num_peak_, - pval_thresh = pval_thresh, - fdr_thresh = fdr_thresh, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - - cpu = call_peak_cpu, - mem_factor = call_peak_mem_factor_, - disk_factor = call_peak_disk_factor_, - time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp - else if peak_caller_ == 'macs2' then runtime_environment_macs2 - else runtime_environment - } - } - File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled - else call_peak_pooled.peak - - # macs2 signal track for pooled rep - if ( has_input_of_call_peak_pooled && !align_only_ && num_rep>1 ) { - call macs2_signal_track as macs2_signal_track_pooled { input : - tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - pval_thresh = pval_thresh, - fraglen = fraglen_mean.rounded_mean, - - mem_factor = macs2_signal_track_mem_factor, - disk_factor = macs2_signal_track_disk_factor, - time_hr = macs2_signal_track_time_hr, - runtime_environment = runtime_environment_macs2 - } - } - - Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) - Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) - if ( has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep>1 ) { - # call peaks on 1st pooled pseudo replicates - call call_peak as call_peak_ppr1 { input : - peak_caller = peak_caller_, - peak_type = peak_type_, - tas = flatten([select_all([pool_ta_pr1.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - cap_num_peak = cap_num_peak_, - pval_thresh = pval_thresh, - fdr_thresh = fdr_thresh, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - - cpu = call_peak_cpu, - mem_factor = call_peak_mem_factor_, - disk_factor = call_peak_disk_factor_, - time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp - else if peak_caller_ == 'macs2' then runtime_environment_macs2 - else runtime_environment - } - } - File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 - else call_peak_ppr1.peak - - Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) - Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) - if ( has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep>1 ) { - # call peaks on 2nd pooled pseudo replicates - call call_peak as call_peak_ppr2 { input : - peak_caller = peak_caller_, - peak_type = peak_type_, - tas = flatten([select_all([pool_ta_pr2.ta_pooled]), chosen_ctl_ta_pooled]), - gensz = gensz_, - chrsz = chrsz_, - cap_num_peak = cap_num_peak_, - pval_thresh = pval_thresh, - fdr_thresh = fdr_thresh, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - - cpu = call_peak_cpu, - mem_factor = call_peak_mem_factor_, - disk_factor = call_peak_disk_factor_, - time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp - else if peak_caller_ == 'macs2' then runtime_environment_macs2 - else runtime_environment - } - } - File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 - else call_peak_ppr2.peak - - # do IDR/overlap on all pairs of two replicates (i,j) - # where i and j are zero-based indices and 0 <= i < j < num_rep - scatter( pair in cross(range(num_rep),range(num_rep)) ) { - # pair.left = 0-based index of 1st replicate - # pair.right = 0-based index of 2nd replicate - File? peak1_ = peak_[pair.left] - File? peak2_ = peak_[pair.right] - if ( !align_only_ && pair.left 1 ) { - # Naive overlap on pooled pseudo replicates - call overlap as overlap_ppr { input : - prefix = 'pooled-pr1_vs_pooled-pr2', - peak1 = peak_ppr1_, - peak2 = peak_ppr2_, - peak_pooled = peak_pooled_, - peak_type = peak_type_, - fraglen = fraglen_mean.rounded_mean, - blacklist = blacklist_, - chrsz = chrsz_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - ta = pool_ta.ta_pooled, - runtime_environment = runtime_environment - } - } - - if ( !align_only_ && !true_rep_only && num_rep > 1 && enable_idr ) { - # IDR on pooled pseduo replicates - call idr as idr_ppr { input : - prefix = 'pooled-pr1_vs_pooled-pr2', - peak1 = peak_ppr1_, - peak2 = peak_ppr2_, - peak_pooled = peak_pooled_, - idr_thresh = idr_thresh, - peak_type = peak_type_, - fraglen = fraglen_mean.rounded_mean, - rank = idr_rank_, - blacklist = blacklist_, - chrsz = chrsz_, - regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, - ta = pool_ta.ta_pooled, - runtime_environment = runtime_environment - } - } - - # reproducibility QC for overlap/IDR peaks - if ( !align_only_ && !true_rep_only && num_rep > 0 ) { - # reproducibility QC for overlapping peaks - call reproducibility as reproducibility_overlap { input : - prefix = 'overlap', - peaks = select_all(overlap.bfilt_overlap_peak), - peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([overlap_pr.bfilt_overlap_peak]) else [], - peak_ppr = overlap_ppr.bfilt_overlap_peak, - peak_type = peak_type_, - chrsz = chrsz_, - runtime_environment = runtime_environment - } - } - - if ( !align_only_ && !true_rep_only && num_rep > 0 && enable_idr ) { - # reproducibility QC for IDR peaks - call reproducibility as reproducibility_idr { input : - prefix = 'idr', - peaks = select_all(idr.bfilt_idr_peak), - peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([idr_pr.bfilt_idr_peak]) else [], - peak_ppr = idr_ppr.bfilt_idr_peak, - peak_type = peak_type_, - chrsz = chrsz_, - runtime_environment = runtime_environment - } - } - - # Generate final QC report and JSON - call qc_report { input : - pipeline_ver = pipeline_ver, - title = title, - description = description, - genome = genome_name_, - paired_ends = paired_end_, - ctl_paired_ends = ctl_paired_end_, - pipeline_type = pipeline_type, - aligner = aligner_, - no_dup_removal = no_dup_removal, - peak_caller = peak_caller_, - cap_num_peak = cap_num_peak_, - idr_thresh = idr_thresh, - pval_thresh = pval_thresh, - xcor_trim_bp = xcor_trim_bp, - xcor_subsample_reads = xcor_subsample_reads, - - samstat_qcs = select_all(align.samstat_qc), - nodup_samstat_qcs = select_all(filter.samstat_qc), - dup_qcs = select_all(filter.dup_qc), - lib_complexity_qcs = select_all(filter.lib_complexity_qc), - xcor_plots = select_all(xcor.plot_png), - xcor_scores = select_all(xcor.score), - - ctl_samstat_qcs = select_all(align_ctl.samstat_qc), - ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), - ctl_dup_qcs = select_all(filter_ctl.dup_qc), - ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), - - jsd_plot = jsd.plot, - jsd_qcs = if defined(jsd.jsd_qcs) then select_first([jsd.jsd_qcs]) else [], - - frip_qcs = select_all(call_peak.frip_qc), - frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), - frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), - frip_qc_pooled = call_peak_pooled.frip_qc, - frip_qc_ppr1 = call_peak_ppr1.frip_qc, - frip_qc_ppr2 = call_peak_ppr2.frip_qc, - - idr_plots = select_all(idr.idr_plot), - idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([idr_pr.idr_plot]) else [], - idr_plot_ppr = idr_ppr.idr_plot, - frip_idr_qcs = select_all(idr.frip_qc), - frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([idr_pr.frip_qc]) else [], - frip_idr_qc_ppr = idr_ppr.frip_qc, - frip_overlap_qcs = select_all(overlap.frip_qc), - frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([overlap_pr.frip_qc]) else [], - frip_overlap_qc_ppr = overlap_ppr.frip_qc, - idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, - overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, - - gc_plots = select_all(gc_bias.gc_plot), - - peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), - peak_region_size_plots = select_all(call_peak.peak_region_size_plot), - num_peak_qcs = select_all(call_peak.num_peak_qc), - - idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, - idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, - idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, - - overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, - overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, - overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, - - runtime_environment = runtime_environment - } - - output { - File report = qc_report.report - File qc_json = qc_report.qc_json - Boolean qc_json_ref_match = qc_report.qc_json_ref_match - } -} - -task align { - input { - Array[File] fastqs_R1 # [merge_id] - Array[File] fastqs_R2 - File? ref_fa - Int? trim_bp # this is for R1 only - Int crop_length - Int crop_length_tol - String? trimmomatic_phred_score_format - - String aligner - - String mito_chr_name - Int? multimapping - File? custom_align_py - File? idx_tar # reference index tar - Boolean paired_end - Boolean use_bwa_mem_for_pe - Int bwa_mem_read_len_limit - Boolean use_bowtie2_local_mode - - String? trimmomatic_java_heap - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") - Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb - Float samtools_mem_gb = 0.8 * mem_gb - Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) - - Float trimmomatic_java_heap_factor = 0.9 - Array[Array[File]] tmp_fastqs = if paired_end then transpose([fastqs_R1, fastqs_R2]) - else transpose([fastqs_R1]) - command { - set -e - - # check if pipeline dependencies can be found - if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] - then - echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 - exit 3 - fi - python3 $(which encode_task_merge_fastq.py) \ - ${write_tsv(tmp_fastqs)} \ - ${if paired_end then '--paired-end' else ''} \ - ${'--nth ' + cpu} - - if [ -z '${trim_bp}' ]; then - SUFFIX= - else - SUFFIX=_trimmed - python3 $(which encode_task_trim_fastq.py) \ - R1/*.fastq.gz \ - --trim-bp ${trim_bp} \ - --out-dir R1$SUFFIX - if [ '${paired_end}' == 'true' ]; then - python3 $(which encode_task_trim_fastq.py) \ - R2/*.fastq.gz \ - --trim-bp ${trim_bp} \ - --out-dir R2$SUFFIX - fi - fi - if [ '${crop_length}' == '0' ]; then - SUFFIX=$SUFFIX - else - NEW_SUFFIX="$SUFFIX"_cropped - python3 $(which encode_task_trimmomatic.py) \ - --fastq1 R1$SUFFIX/*.fastq.gz \ - ${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \ - ${if paired_end then '--paired-end' else ''} \ - --crop-length ${crop_length} \ - --crop-length-tol "${crop_length_tol}" \ - ${'--phred-score-format ' + trimmomatic_phred_score_format } \ - --out-dir-R1 R1$NEW_SUFFIX \ - ${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \ - ${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + 'G')} \ - ${'--nth ' + cpu} - SUFFIX=$NEW_SUFFIX - fi - - if [ '${aligner}' == 'bwa' ]; then - python3 $(which encode_task_bwa.py) \ - ${idx_tar} \ - R1$SUFFIX/*.fastq.gz \ - ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ - ${if paired_end then '--paired-end' else ''} \ - ${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \ - ${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - - elif [ '${aligner}' == 'bowtie2' ]; then - python3 $(which encode_task_bowtie2.py) \ - ${idx_tar} \ - R1$SUFFIX/*.fastq.gz \ - ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ - ${'--multimapping ' + multimapping} \ - ${if paired_end then '--paired-end' else ''} \ - ${if use_bowtie2_local_mode then '--local' else ''} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - else - python3 ${custom_align_py} \ - ${idx_tar} \ - R1$SUFFIX/*.fastq.gz \ - ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ - ${if paired_end then '--paired-end' else ''} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - fi - - python3 $(which encode_task_post_align.py) \ - R1$SUFFIX/*.fastq.gz $(ls *.bam) \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - rm -rf R1 R2 R1$SUFFIX R2$SUFFIX - } - output { - File bam = glob('*.bam')[0] - File bai = glob('*.bai')[0] - File samstat_qc = glob('*.samstats.qc')[0] - File read_len_log = glob('*.read_length.txt')[0] - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - preemptible: 0 - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task filter { - input { - File? bam - Boolean paired_end - File? ref_fa - Boolean redact_nodup_bam - String dup_marker # picard.jar MarkDuplicates (picard) or - # sambamba markdup (sambamba) - Int mapq_thresh # threshold for low MAPQ reads removal - Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM - File chrsz # 2-col chromosome sizes file - Boolean no_dup_removal # no dupe reads removal when filtering BAM - String mito_chr_name - - Int cpu - Float mem_factor - String? picard_java_heap - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(bam, "G") - Float picard_java_heap_factor = 0.9 - Float mem_gb = 6.0 + mem_factor * input_file_size_gb - Float samtools_mem_gb = 0.8 * mem_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_filter.py) \ - ${bam} \ - ${if paired_end then '--paired-end' else ''} \ - --multimapping 0 \ - ${'--dup-marker ' + dup_marker} \ - ${'--mapq-thresh ' + mapq_thresh} \ - --filter-chrs ${sep=' ' filter_chrs} \ - ${'--chrsz ' + chrsz} \ - ${if no_dup_removal then '--no-dup-removal' else ''} \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} \ - ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} - - if [ '${redact_nodup_bam}' == 'true' ]; then - python3 $(which encode_task_bam_to_pbam.py) \ - $(ls *.bam) \ - ${'--ref-fa ' + ref_fa} \ - '--delete-original-bam' - fi - } - output { - File nodup_bam = glob('*.bam')[0] - File nodup_bai = glob('*.bai')[0] - File samstat_qc = glob('*.samstats.qc')[0] - File dup_qc = glob('*.dup.qc')[0] - File lib_complexity_qc = glob('*.lib_complexity.qc')[0] - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task bam2ta { - input { - File? bam - Boolean paired_end - String mito_chr_name # mito chromosome name - Int subsample # number of reads to subsample TAGALIGN - # this affects all downstream analysis - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(bam, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Float samtools_mem_gb = 0.8 * mem_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_bam2ta.py) \ - ${bam} \ - --disable-tn5-shift \ - ${if paired_end then '--paired-end' else ''} \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--subsample ' + subsample} \ - ${'--mem-gb ' + samtools_mem_gb} \ - ${'--nth ' + cpu} - } - output { - File ta = glob('*.tagAlign.gz')[0] - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task spr { - input { - File? ta - Boolean paired_end - Int pseudoreplication_random_seed - - Float mem_factor - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(ta, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_spr.py) \ - ${ta} \ - ${'--pseudoreplication-random-seed ' + pseudoreplication_random_seed} \ - ${if paired_end then '--paired-end' else ''} - } - output { - File ta_pr1 = glob('*.pr1.tagAlign.gz')[0] - File ta_pr2 = glob('*.pr2.tagAlign.gz')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 4 - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task pool_ta { - input { - Array[File?] tas - Int? col # number of columns in pooled TA - String? prefix # basename prefix - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_pool_ta.py) \ - ${sep=' ' select_all(tas)} \ - ${'--prefix ' + prefix} \ - ${'--col ' + col} - } - output { - File ta_pooled = glob('*.tagAlign.gz')[0] - } - runtime { - cpu : 1 - memory : '8 GB' - time : 4 - disks : 'local-disk 100 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task xcor { - input { - File? ta - Boolean paired_end - String mito_chr_name - Int subsample # number of reads to subsample TAGALIGN - # this will be used for xcor only - # will not affect any downstream analysis - String? chip_seq_type - Int? exclusion_range_min - Int? exclusion_range_max - - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(ta, "G") - Float mem_gb = 8.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_xcor.py) \ - ${ta} \ - ${if paired_end then '--paired-end' else ''} \ - ${'--mito-chr-name ' + mito_chr_name} \ - ${'--subsample ' + subsample} \ - ${'--chip-seq-type ' + chip_seq_type} \ - ${'--exclusion-range-min ' + exclusion_range_min} \ - ${'--exclusion-range-max ' + exclusion_range_max} \ - ${'--subsample ' + subsample} \ - ${'--nth ' + cpu} - } - output { - File plot_pdf = glob('*.cc.plot.pdf')[0] - File plot_png = glob('*.cc.plot.png')[0] - File score = glob('*.cc.qc')[0] - File fraglen_log = glob('*.cc.fraglen.txt')[0] - Int fraglen = read_int(fraglen_log) - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task jsd { - input { - Array[File?] nodup_bams - Array[File?] ctl_bams - File? blacklist - Int mapq_thresh - - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") - Float mem_gb = 5.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_jsd.py) \ - ${sep=' ' select_all(nodup_bams)} \ - ${if length(ctl_bams)>0 then '--ctl-bam '+ select_first(ctl_bams) else ''} \ - ${'--mapq-thresh '+ mapq_thresh} \ - ${'--blacklist '+ blacklist} \ - ${'--nth ' + cpu} - } - output { - File plot = glob('*.png')[0] - Array[File] jsd_qcs = glob('*.jsd.qc') - } - runtime { - cpu : cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task choose_ctl { - input { - Array[File?] tas - Array[File?] ctl_tas - File? ta_pooled - File? ctl_ta_pooled - Boolean always_use_pooled_ctl # always use pooled control for all exp rep. - Float ctl_depth_ratio # if ratio between controls is higher than this - # then always use pooled control for all exp rep. - Int ctl_depth_limit - Float exp_ctl_depth_ratio_limit - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_choose_ctl.py) \ - --tas ${sep=' ' select_all(tas)} \ - --ctl-tas ${sep=' ' select_all(ctl_tas)} \ - ${'--ta-pooled ' + ta_pooled} \ - ${'--ctl-ta-pooled ' + ctl_ta_pooled} \ - ${if always_use_pooled_ctl then '--always-use-pooled-ctl' else ''} \ - ${'--ctl-depth-ratio ' + ctl_depth_ratio} \ - ${'--ctl-depth-limit ' + ctl_depth_limit} \ - ${'--exp-ctl-depth-ratio-limit ' + exp_ctl_depth_ratio_limit} - } - output { - File chosen_ctl_id_tsv = glob('chosen_ctl.tsv')[0] - File chosen_ctl_subsample_tsv = glob('chosen_ctl_subsample.tsv')[0] - File chosen_ctl_subsample_pooled_txt = glob('chosen_ctl_subsample_pooled.txt')[0] - Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) - Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) - Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task count_signal_track { - input { - File? ta # tag-align - File chrsz # 2-col chromosome sizes file - - RuntimeEnvironment runtime_environment - } - Float mem_gb = 8.0 - - command { - set -e - python3 $(which encode_task_count_signal_track.py) \ - ${ta} \ - ${'--chrsz ' + chrsz} \ - ${'--mem-gb ' + mem_gb} - } - output { - File pos_bw = glob('*.positive.bigwig')[0] - File neg_bw = glob('*.negative.bigwig')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task subsample_ctl { - input { - File? ta - Boolean paired_end - Int subsample - - Float mem_factor - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(ta, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - python3 $(which encode_task_subsample_ctl.py) \ - ${ta} \ - ${'--subsample ' + subsample} \ - ${if paired_end then '--paired-end' else ''} \ - } - output { - File ta_subsampled = glob('*.tagAlign.gz')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 4 - disks : 'local-disk ${disk_gb} SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task call_peak { - input { - String peak_caller - String peak_type - Array[File?] tas # [ta, control_ta]. control_ta is optional - Int fraglen # fragment length from xcor - String gensz # Genome size (sum of entries in 2nd column of - # chr. sizes file, or hs for human, ms for mouse) - File chrsz # 2-col chromosome sizes file - Int cap_num_peak # cap number of raw peaks called from MACS2 - Float pval_thresh # p.value threshold for MACS2 - Float? fdr_thresh # FDR threshold for SPP - - File? blacklist # blacklist BED to filter raw peaks - String? regex_bfilt_peak_chr_name - - Int cpu - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(tas, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - - if [ '${peak_caller}' == 'macs2' ]; then - python3 $(which encode_task_macs2_chip.py) \ - ${sep=' ' select_all(tas)} \ - ${'--gensz '+ gensz} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--cap-num-peak ' + cap_num_peak} \ - ${'--pval-thresh '+ pval_thresh} \ - ${'--mem-gb ' + mem_gb} - - elif [ '${peak_caller}' == 'spp' ]; then - python3 $(which encode_task_spp.py) \ - ${sep=' ' select_all(tas)} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--cap-num-peak ' + cap_num_peak} \ - ${'--fdr-thresh '+ fdr_thresh} \ - ${'--nth ' + cpu} - fi - - python3 $(which encode_task_post_call_peak_chip.py) \ - $(ls *Peak.gz) \ - ${'--ta ' + tas[0]} \ - ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--peak-type ' + peak_type} \ - ${'--blacklist ' + blacklist} - } - output { - File peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] - # generated by post_call_peak py - File bfilt_peak = glob('*.bfilt.'+peak_type+'.gz')[0] - File bfilt_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] - File bfilt_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] - File bfilt_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] - File bfilt_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] - File frip_qc = glob('*.frip.qc')[0] - File peak_region_size_qc = glob('*.peak_region_size.qc')[0] - File peak_region_size_plot = glob('*.peak_region_size.png')[0] - File num_peak_qc = glob('*.num_peak.qc')[0] - } - runtime { - cpu : if peak_caller == 'macs2' then 2 else cpu - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - preemptible: 0 - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task macs2_signal_track { - input { - Array[File?] tas # [ta, control_ta]. control_ta is optional - Int fraglen # fragment length from xcor - String gensz # Genome size (sum of entries in 2nd column of - # chr. sizes file, or hs for human, ms for mouse) - File chrsz # 2-col chromosome sizes file - Float pval_thresh # p.value threshold - - Float mem_factor - Int time_hr - Float disk_factor - - RuntimeEnvironment runtime_environment - } - Float input_file_size_gb = size(tas, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) - - command { - set -e - python3 $(which encode_task_macs2_signal_track_chip.py) \ - ${sep=' ' select_all(tas)} \ - ${'--gensz '+ gensz} \ - ${'--chrsz ' + chrsz} \ - ${'--fraglen ' + fraglen} \ - ${'--pval-thresh '+ pval_thresh} \ - ${'--mem-gb ' + mem_gb} - } - output { - File pval_bw = glob('*.pval.signal.bigwig')[0] - File fc_bw = glob('*.fc.signal.bigwig')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : time_hr - disks : 'local-disk ${disk_gb} SSD' - preemptible: 0 - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task idr { - input { - String prefix # prefix for IDR output file - File? peak1 - File? peak2 - File? peak_pooled - Float idr_thresh - File? blacklist # blacklist BED to filter raw peaks - String regex_bfilt_peak_chr_name - # parameters to compute FRiP - File? ta # to calculate FRiP - Int? fraglen # fragment length from xcor - File chrsz # 2-col chromosome sizes file - String peak_type - String rank - - RuntimeEnvironment runtime_environment - } - - command { - set -e - ${if defined(ta) then '' else 'touch null.frip.qc'} - touch null - python3 $(which encode_task_idr.py) \ - ${peak1} ${peak2} ${peak_pooled} \ - ${'--prefix ' + prefix} \ - ${'--idr-thresh ' + idr_thresh} \ - ${'--peak-type ' + peak_type} \ - --idr-rank ${rank} \ - ${'--fraglen ' + fraglen} \ - ${'--chrsz ' + chrsz} \ - ${'--blacklist '+ blacklist} \ - ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ - ${'--ta ' + ta} - } - output { - File idr_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] - File bfilt_idr_peak = glob('*.bfilt.'+peak_type+'.gz')[0] - File bfilt_idr_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] - File bfilt_idr_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] - File bfilt_idr_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] - File bfilt_idr_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] - File idr_plot = glob('*.txt.png')[0] - File idr_unthresholded_peak = glob('*.txt.gz')[0] - File idr_log = glob('*.idr*.log')[0] - File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task overlap { - input { - String prefix # prefix for IDR output file - File? peak1 - File? peak2 - File? peak_pooled - File? blacklist # blacklist BED to filter raw peaks - String regex_bfilt_peak_chr_name - # parameters to compute FRiP - File? ta # to calculate FRiP - Int? fraglen # fragment length from xcor (for FRIP) - File chrsz # 2-col chromosome sizes file - String peak_type - - RuntimeEnvironment runtime_environment - } - - command { - set -e - ${if defined(ta) then '' else 'touch null.frip.qc'} - touch null - python3 $(which encode_task_overlap.py) \ - ${peak1} ${peak2} ${peak_pooled} \ - ${'--prefix ' + prefix} \ - ${'--peak-type ' + peak_type} \ - ${'--fraglen ' + fraglen} \ - ${'--chrsz ' + chrsz} \ - ${'--blacklist '+ blacklist} \ - --nonamecheck \ - ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ - ${'--ta ' + ta} - } - output { - File overlap_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] - File bfilt_overlap_peak = glob('*.bfilt.'+peak_type+'.gz')[0] - File bfilt_overlap_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] - File bfilt_overlap_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] - File bfilt_overlap_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] - File bfilt_overlap_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] - File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task reproducibility { - input { - String prefix - Array[File] peaks # peak files from pair of true replicates - # in a sorted order. for example of 4 replicates, - # 1,2 1,3 1,4 2,3 2,4 3,4. - # x,y means peak file from rep-x vs rep-y - Array[File] peaks_pr # peak files from pseudo replicates - File? peak_ppr # Peak file from pooled pseudo replicate. - String peak_type - File chrsz # 2-col chromosome sizes file - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_reproducibility.py) \ - ${sep=' ' peaks} \ - --peaks-pr ${sep=' ' peaks_pr} \ - ${'--peak-ppr '+ peak_ppr} \ - --prefix ${prefix} \ - ${'--peak-type ' + peak_type} \ - ${'--chrsz ' + chrsz} - } - output { - File optimal_peak = glob('*optimal_peak.*.gz')[0] - File optimal_peak_bb = glob('*optimal_peak.*.bb')[0] - File optimal_peak_starch = glob('*optimal_peak.*.starch')[0] - File optimal_peak_hammock = glob('*optimal_peak.*.hammock.gz*')[0] - File optimal_peak_hammock_tbi = glob('*optimal_peak.*.hammock.gz*')[1] - File conservative_peak = glob('*conservative_peak.*.gz')[0] - File conservative_peak_bb = glob('*conservative_peak.*.bb')[0] - File conservative_peak_starch = glob('*conservative_peak.*.starch')[0] - File conservative_peak_hammock = glob('*conservative_peak.*.hammock.gz*')[0] - File conservative_peak_hammock_tbi = glob('*conservative_peak.*.hammock.gz*')[1] - File reproducibility_qc = glob('*reproducibility.qc')[0] - # QC metrics for optimal peak - File peak_region_size_qc = glob('*.peak_region_size.qc')[0] - File peak_region_size_plot = glob('*.peak_region_size.png')[0] - File num_peak_qc = glob('*.num_peak.qc')[0] - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task gc_bias { - input { - File? nodup_bam - File ref_fa - - String? picard_java_heap - - RuntimeEnvironment runtime_environment - } - Float mem_factor = 0.3 - Float input_file_size_gb = size(nodup_bam, "G") - Float mem_gb = 4.0 + mem_factor * input_file_size_gb - Float picard_java_heap_factor = 0.9 - - command { - set -e - python3 $(which encode_task_gc_bias.py) \ - ${'--nodup-bam ' + nodup_bam} \ - ${'--ref-fa ' + ref_fa} \ - ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} - } - output { - File gc_plot = glob('*.gc_plot.png')[0] - File gc_log = glob('*.gc.txt')[0] - } - runtime { - cpu : 1 - memory : '${mem_gb} GB' - time : 6 - disks : 'local-disk 250 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task qc_report { - input { - # optional metadata - String pipeline_ver - String title # name of sample - String description # description for sample - String? genome - #String? encode_accession_id # ENCODE accession ID of sample - # workflow params - Array[Boolean] paired_ends - Array[Boolean] ctl_paired_ends - String pipeline_type - String aligner - Boolean no_dup_removal - String peak_caller - Int cap_num_peak - Float idr_thresh - Float pval_thresh - Int xcor_trim_bp - Int xcor_subsample_reads - # QCs - Array[File] samstat_qcs - Array[File] nodup_samstat_qcs - Array[File] dup_qcs - Array[File] lib_complexity_qcs - Array[File] ctl_samstat_qcs - Array[File] ctl_nodup_samstat_qcs - Array[File] ctl_dup_qcs - Array[File] ctl_lib_complexity_qcs - Array[File] xcor_plots - Array[File] xcor_scores - File? jsd_plot - Array[File] jsd_qcs - Array[File] idr_plots - Array[File] idr_plots_pr - File? idr_plot_ppr - Array[File] frip_qcs - Array[File] frip_qcs_pr1 - Array[File] frip_qcs_pr2 - File? frip_qc_pooled - File? frip_qc_ppr1 - File? frip_qc_ppr2 - Array[File] frip_idr_qcs - Array[File] frip_idr_qcs_pr - File? frip_idr_qc_ppr - Array[File] frip_overlap_qcs - Array[File] frip_overlap_qcs_pr - File? frip_overlap_qc_ppr - File? idr_reproducibility_qc - File? overlap_reproducibility_qc - - Array[File] gc_plots - - Array[File] peak_region_size_qcs - Array[File] peak_region_size_plots - Array[File] num_peak_qcs - - File? idr_opt_peak_region_size_qc - File? idr_opt_peak_region_size_plot - File? idr_opt_num_peak_qc - - File? overlap_opt_peak_region_size_qc - File? overlap_opt_peak_region_size_plot - File? overlap_opt_num_peak_qc - - File? qc_json_ref - - RuntimeEnvironment runtime_environment - } - - command { - set -e - python3 $(which encode_task_qc_report.py) \ - --pipeline-prefix chip \ - ${'--pipeline-ver ' + pipeline_ver} \ - ${"--title '" + sub(title,"'","_") + "'"} \ - ${"--desc '" + sub(description,"'","_") + "'"} \ - ${'--genome ' + genome} \ - ${'--multimapping ' + 0} \ - --paired-ends ${sep=' ' paired_ends} \ - --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ - --pipeline-type ${pipeline_type} \ - --aligner ${aligner} \ - ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ - --peak-caller ${peak_caller} \ - ${'--cap-num-peak ' + cap_num_peak} \ - --idr-thresh ${idr_thresh} \ - --pval-thresh ${pval_thresh} \ - --xcor-trim-bp ${xcor_trim_bp} \ - --xcor-subsample-reads ${xcor_subsample_reads} \ - --samstat-qcs ${sep='_:_' samstat_qcs} \ - --nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \ - --dup-qcs ${sep='_:_' dup_qcs} \ - --lib-complexity-qcs ${sep='_:_' lib_complexity_qcs} \ - --xcor-plots ${sep='_:_' xcor_plots} \ - --xcor-scores ${sep='_:_' xcor_scores} \ - --idr-plots ${sep='_:_' idr_plots} \ - --idr-plots-pr ${sep='_:_' idr_plots_pr} \ - --ctl-samstat-qcs ${sep='_:_' ctl_samstat_qcs} \ - --ctl-nodup-samstat-qcs ${sep='_:_' ctl_nodup_samstat_qcs} \ - --ctl-dup-qcs ${sep='_:_' ctl_dup_qcs} \ - --ctl-lib-complexity-qcs ${sep='_:_' ctl_lib_complexity_qcs} \ - ${'--jsd-plot ' + jsd_plot} \ - --jsd-qcs ${sep='_:_' jsd_qcs} \ - ${'--idr-plot-ppr ' + idr_plot_ppr} \ - --frip-qcs ${sep='_:_' frip_qcs} \ - --frip-qcs-pr1 ${sep='_:_' frip_qcs_pr1} \ - --frip-qcs-pr2 ${sep='_:_' frip_qcs_pr2} \ - ${'--frip-qc-pooled ' + frip_qc_pooled} \ - ${'--frip-qc-ppr1 ' + frip_qc_ppr1} \ - ${'--frip-qc-ppr2 ' + frip_qc_ppr2} \ - --frip-idr-qcs ${sep='_:_' frip_idr_qcs} \ - --frip-idr-qcs-pr ${sep='_:_' frip_idr_qcs_pr} \ - ${'--frip-idr-qc-ppr ' + frip_idr_qc_ppr} \ - --frip-overlap-qcs ${sep='_:_' frip_overlap_qcs} \ - --frip-overlap-qcs-pr ${sep='_:_' frip_overlap_qcs_pr} \ - ${'--frip-overlap-qc-ppr ' + frip_overlap_qc_ppr} \ - ${'--idr-reproducibility-qc ' + idr_reproducibility_qc} \ - ${'--overlap-reproducibility-qc ' + overlap_reproducibility_qc} \ - --gc-plots ${sep='_:_' gc_plots} \ - --peak-region-size-qcs ${sep='_:_' peak_region_size_qcs} \ - --peak-region-size-plots ${sep='_:_' peak_region_size_plots} \ - --num-peak-qcs ${sep='_:_' num_peak_qcs} \ - ${'--idr-opt-peak-region-size-qc ' + idr_opt_peak_region_size_qc} \ - ${'--idr-opt-peak-region-size-plot ' + idr_opt_peak_region_size_plot} \ - ${'--idr-opt-num-peak-qc ' + idr_opt_num_peak_qc} \ - ${'--overlap-opt-peak-region-size-qc ' + overlap_opt_peak_region_size_qc} \ - ${'--overlap-opt-peak-region-size-plot ' + overlap_opt_peak_region_size_plot} \ - ${'--overlap-opt-num-peak-qc ' + overlap_opt_num_peak_qc} \ - --out-qc-html qc.html \ - --out-qc-json qc.json \ - ${'--qc-json-ref ' + qc_json_ref} - } - output { - File report = glob('*qc.html')[0] - File qc_json = glob('*qc.json')[0] - Boolean qc_json_ref_match = read_string('qc_json_ref_match.txt')=='True' - } - runtime { - cpu : 1 - memory : '4 GB' - time : 4 - disks : 'local-disk 50 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -### workflow system tasks -task read_genome_tsv { - input { - File? genome_tsv - String? null_s - - RuntimeEnvironment runtime_environment - } - command <<< - echo "$(basename ~{genome_tsv})" > genome_name - # create empty files for all entries - touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 - touch mito_chr_name - touch regex_bfilt_peak_chr_name - - python <>> - output { - String? genome_name = read_string('genome_name') - String? ref_fa = if size('ref_fa')==0 then null_s else read_string('ref_fa') - String? bwa_idx_tar = if size('bwa_idx_tar')==0 then null_s else read_string('bwa_idx_tar') - String? bowtie2_idx_tar = if size('bowtie2_idx_tar')==0 then null_s else read_string('bowtie2_idx_tar') - String? chrsz = if size('chrsz')==0 then null_s else read_string('chrsz') - String? gensz = if size('gensz')==0 then null_s else read_string('gensz') - String? blacklist = if size('blacklist')==0 then null_s else read_string('blacklist') - String? blacklist2 = if size('blacklist2')==0 then null_s else read_string('blacklist2') - String? mito_chr_name = if size('mito_chr_name')==0 then null_s else read_string('mito_chr_name') - String? regex_bfilt_peak_chr_name = if size('regex_bfilt_peak_chr_name')==0 then 'chr[\\dXY]+' - else read_string('regex_bfilt_peak_chr_name') - } - runtime { - maxRetries : 0 - cpu : 1 - memory : '2 GB' - time : 4 - disks : 'local-disk 10 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task rounded_mean { - input { - Array[Int] ints - - RuntimeEnvironment runtime_environment - } - command <<< - python <>> - output { - Int rounded_mean = read_int('tmp.txt') - } - runtime { - cpu : 1 - memory : '2 GB' - time : 4 - disks : 'local-disk 10 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} - -task raise_exception { - input { - String msg - - RuntimeEnvironment runtime_environment - } - command { - echo -e "\n* Error: ${msg}\n" >&2 - exit 2 - } - output { - String error_msg = '${msg}' - } - runtime { - maxRetries : 0 - cpu : 1 - memory : '2 GB' - time : 4 - disks : 'local-disk 10 SSD' - - docker : runtime_environment.docker - singularity : runtime_environment.singularity - conda : runtime_environment.conda - } -} \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl b/backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl deleted file mode 100644 index 281e31443..000000000 --- a/backup/wdl-format-old/tests/format/clays_complex_script/source.formatted.wdl +++ /dev/null @@ -1,7 +0,0 @@ -## # Header -# regular comment will be left as is -## part of preamble - -#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing -#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput -version 1.2 diff --git a/backup/wdl-format-old/tests/format/clays_complex_script/source.wdl b/backup/wdl-format-old/tests/format/clays_complex_script/source.wdl deleted file mode 100644 index 81faa4fa6..000000000 --- a/backup/wdl-format-old/tests/format/clays_complex_script/source.wdl +++ /dev/null @@ -1,165 +0,0 @@ -## # Header -# regular comment will be left as is -#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing -#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput - -## part of preamble -version 1.2 - -#@ except: MissingMetas -struct AStruct { - String member -} - -task a_task { - meta - # Here is a comment between `meta` and the parenthesis. - { - # Here is a comment within `meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. - { - # Here is a comment within `parameter_meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - input - # Here is a comment before the input. - { - Object an_object - String a_string - Boolean a_boolean - Int an_integer - Float a_float - AStruct a_struct # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - } - - command <<< >>> - - output - # Here is a comment before the output. - { - Object some_other_object = {} - String some_other_string = "foo bar baz" - Boolean some_other_boolean = true - Int some_other_integer = 42 - Float some_other_float = 0e3 - # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - AStruct some_other_struct = AStruct {} - } - - requirements - # This is a comment before the requirements. - { - container: "ubuntu:latest" - } - - hints { - max_cpu: 1 - } -} - -## These double-pound-sign comments -## should be converted to single-pound-sign comments. -workflow hello { - meta - # Here is a comment between `meta` and the parenthesis. - { - # Here is a comment within `meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. - { - # Here is a comment within `parameter_meta`. - an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" - a_true: true - a_false: false - an_integer: 42 - a_float: -0.0e123 - an_array: [true, -42, "hello, world"] ## This should be converted to a single-pound-sign comment. - an_object: { - subkey_one: "a", - subkey_two: 73, - subkey_three: true, - subkey_four: false, - } - an_undefined_value: null - } - - input { - Object an_object - String a_string - Boolean a_boolean - Int an_integer - Float a_float - AStruct a_struct # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - } - - call a_task { - } - - scatter (name in name_array) { - call say_task { greeting = greeting } - } - - if (some_condition_task) { - call a_task as task_two {} - } - - output - # Here is a comment before the output. - { - Object some_other_object = {} - String some_other_string = "foo bar baz" - Boolean some_other_boolean = true - Int some_other_integer = 42 - Float some_other_float = 0e3 - # This should not be higlighted, as it's not known within - # the TextMate language that it's a custom struct. - AStruct some_other_struct = AStruct {} - } -} \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl b/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl deleted file mode 100644 index 7c8de0324..000000000 --- a/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.formatted.wdl +++ /dev/null @@ -1 +0,0 @@ -version 1.0 diff --git a/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl b/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl deleted file mode 100644 index 7e3333f0a..000000000 --- a/backup/wdl-format-old/tests/format/complex_meta_and_calls/source.wdl +++ /dev/null @@ -1,106 +0,0 @@ -version -1.0 -workflow -test_wf -{ -input -{ -SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { -noncanonical_motifs: 30, -GT_AG_and_CT_AC_motif: 12, -} -} -parameter_meta -{ -out_sj_filter_overhang_min: { -type: "SpliceJunctionMotifs", -label: "Minimum overhang required to support a splicing junction" -} -} -output -{ -SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min -String a = "friend" -Int b = 1 + 2 -String c = "Hello, ~{a}" -Map[String, Int] d = { "a": 0, "b": 1, "c": 2} -} -meta { -a: "hello" -b: 'world' -c: 5 -d: -0xf -e: 1.0e10 -f: -2. -g: true -h: false -i: null -j: { -a: [1, 2, 3], -b: ["hello", "world", "!"], -c: { -x: 1, -y: 2, -z: 3 -} -} -k: [ -{ -a: {}, -b: 0, -c: "", -d: '', -e: [], -}, -{ -x: [1.0, 2.0, 3.0] -} -] -} -call no_params -call with_params { input: a, b, c, d = 1 } -call qualified.name -call qualified.name { input: a = 1, b = 2, c = "3" } -call aliased as x -call aliased as x { input: } -call f after x after y -call f after x after y { input: a = [] } -call f as x after x -call f as x after x after y { input: name = "hello" } -call test_task as foo { -input: bowchicka = "wowwow" -} -if ( -true -) { - -call test_task after foo { -input: bowchicka = "bowchicka" -} -scatter (i in range(3)) { -call test_task as bar { -input: bowchicka = i * 42 -} -} -} - -} -task -test_task -{ -command <<<>>> -input { -String bowchicka -} -parameter_meta { -bowchicka: { -type: "String", -label: "Bowchicka" -} -} -} - -struct SpliceJunctionMotifs { -Int noncanonical_motifs -Int GT_AG_and_CT_AC_motif -} diff --git a/backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl deleted file mode 100644 index 6a2d1da70..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_both_comments/source.formatted.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 - -# fileA 1.1 -import # fileA 1.2 - # fileA 2.1 - # fileA 2.2 - "fileA.wdl" # fileA 2.3 - # fileA 3.1 - as # fileA 3.2 - # fileA 4.1 - bar # fileA 4.2 - # fileA 5.1 - alias # fileA 5.2 - # fileA 6.1 - qux # fileA 6.2 - # fileA 7.1 - as # fileA 7.2 - # fileA 8.1 - Qux # fileA 8.2 -# this comment belongs to fileB -import "fileB.wdl" as foo # also fileB -# this comment belongs to fileC -import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl deleted file mode 100644 index 1c32809f6..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_both_comments/source.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 -# this comment belongs to fileB -import "fileB.wdl" as foo # also fileB -# fileA 1.1 -import # fileA 1.2 -# fileA 2.1 -# fileA 2.2 -"fileA.wdl" # fileA 2.3 -# fileA 3.1 -as # fileA 3.2 -# fileA 4.1 -bar # fileA 4.2 -# fileA 5.1 -alias # fileA 5.2 -# fileA 6.1 -qux # fileA 6.2 -# fileA 7.1 -as # fileA 7.2 -# fileA 8.1 -Qux # fileA 8.2 -workflow test {} -# this comment belongs to fileC -import "fileC.wdl" # also fileC diff --git a/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl deleted file mode 100644 index e23115af6..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.formatted.wdl +++ /dev/null @@ -1,12 +0,0 @@ -version 1.0 - -import # fileA 1 - "fileA.wdl" # fileA 2 - as # fileA 3 - bar # fileA 4 - alias # fileA 5 - qux # fileA 6 - as # fileA 7 - Qux # fileA 8 -import "fileB.wdl" as foo # fileB -import "fileC.wdl" # fileC diff --git a/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl deleted file mode 100644 index f633e72d8..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_inline_comments/source.wdl +++ /dev/null @@ -1,12 +0,0 @@ -version 1.0 -import "fileB.wdl" as foo # fileB -workflow test {} -import "fileC.wdl" # fileC -import # fileA 1 -"fileA.wdl" # fileA 2 -as # fileA 3 -bar # fileA 4 -alias # fileA 5 -qux # fileA 6 -as # fileA 7 -Qux # fileA 8 diff --git a/backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl deleted file mode 100644 index 564a6c05f..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_no_comments/source.formatted.wdl +++ /dev/null @@ -1,5 +0,0 @@ -version 1.1 - -import "fileA.wdl" as bar alias cows as horses alias cats as dogs -import "fileB.wdl" as foo -import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl deleted file mode 100644 index e69a1a727..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_no_comments/source.wdl +++ /dev/null @@ -1,7 +0,0 @@ - version 1.1 - - import "fileB.wdl" as foo - import "fileA.wdl" as bar alias cows as horses - alias cats as dogs - workflow test {} - import "fileC.wdl" alias qux as Qux diff --git a/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl b/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl deleted file mode 100644 index 8b07048e9..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.formatted.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 - -# fileA 1 -import - # fileA 2.1 - # fileA 2.2 - "fileA.wdl" - # fileA 3 - as - # fileA 4 - bar - # fileA 5 - alias - # fileA 6 - qux - # fileA 7 - as - # fileA 8 - Qux -# this comment belongs to fileB -import "fileB.wdl" as foo -# this comment belongs to fileC -import "fileC.wdl" diff --git a/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl b/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl deleted file mode 100644 index a27e7a4fc..000000000 --- a/backup/wdl-format-old/tests/format/imports_with_preceding_comments/source.wdl +++ /dev/null @@ -1,23 +0,0 @@ -version 1.1 -workflow test {} -# this comment belongs to fileC -import "fileC.wdl" -# this comment belongs to fileB -import "fileB.wdl" as foo -# fileA 1 -import -# fileA 2.1 -# fileA 2.2 -"fileA.wdl" -# fileA 3 -as -# fileA 4 -bar -# fileA 5 -alias -# fileA 6 -qux -# fileA 7 -as -# fileA 8 -Qux diff --git a/backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl b/backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl deleted file mode 100644 index 6cd003333..000000000 --- a/backup/wdl-format-old/tests/format/interrupt_example/source.formatted.wdl +++ /dev/null @@ -1,2 +0,0 @@ -version # interrupt - 1.2 # how far should '1.2' be indented? diff --git a/backup/wdl-format-old/tests/format/interrupt_example/source.wdl b/backup/wdl-format-old/tests/format/interrupt_example/source.wdl deleted file mode 100644 index 30e667287..000000000 --- a/backup/wdl-format-old/tests/format/interrupt_example/source.wdl +++ /dev/null @@ -1,10 +0,0 @@ -version # interrupt -1.2 # how far should '1.2' be indented? - -workflow -# interrupt -test # should this be indented? -# interrupt -{ meta # interrupt -{ # how far should this bracket be indented? -}} \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt b/backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt deleted file mode 100644 index 335221306..000000000 --- a/backup/wdl-format-old/tests/format/seaseq-case/LICENSE.txt +++ /dev/null @@ -1,205 +0,0 @@ -'source.wdl' obtained from: https://github.com/stjude/seaseq/blob/49493a7097e655671b915171e6debe40fa284200/seaseq-case.wdl -on the date 08-05-2024. -It was accompanied by the following license: - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl b/backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl deleted file mode 100644 index a25a05879..000000000 --- a/backup/wdl-format-old/tests/format/seaseq-case/source.formatted.wdl +++ /dev/null @@ -1,17 +0,0 @@ -version 1.0 - -import "workflows/tasks/bedtools.wdl" -import "workflows/tasks/bowtie.wdl" -import "workflows/tasks/fastqc.wdl" -import "workflows/tasks/macs.wdl" -import "workflows/tasks/rose.wdl" -import "workflows/tasks/runspp.wdl" -import "workflows/tasks/samtools.wdl" -import "workflows/tasks/seaseq_util.wdl" as util -import "workflows/tasks/sicer.wdl" -import "workflows/tasks/sortbed.wdl" -import "workflows/tasks/sratoolkit.wdl" as sra -import "workflows/workflows/bamtogff.wdl" -import "workflows/workflows/mapping.wdl" -import "workflows/workflows/motifs.wdl" -import "workflows/workflows/visualization.wdl" as viz diff --git a/backup/wdl-format-old/tests/format/seaseq-case/source.wdl b/backup/wdl-format-old/tests/format/seaseq-case/source.wdl deleted file mode 100644 index 94c76656e..000000000 --- a/backup/wdl-format-old/tests/format/seaseq-case/source.wdl +++ /dev/null @@ -1,898 +0,0 @@ -version 1.0 -import "workflows/tasks/fastqc.wdl" -import "workflows/tasks/bedtools.wdl" -import "workflows/tasks/bowtie.wdl" -import "workflows/tasks/samtools.wdl" -import "workflows/tasks/macs.wdl" -import "workflows/workflows/bamtogff.wdl" -import "workflows/tasks/sicer.wdl" -import "workflows/workflows/motifs.wdl" -import "workflows/tasks/rose.wdl" -import "workflows/tasks/seaseq_util.wdl" as util -import "workflows/workflows/visualization.wdl" as viz -import "workflows/workflows/mapping.wdl" -import "workflows/tasks/runspp.wdl" -import "workflows/tasks/sortbed.wdl" -import "workflows/tasks/sratoolkit.wdl" as sra - -workflow seaseq { - String pipeline_ver = 'v2.0.0' - - meta { - title: 'SEAseq Analysis' - summary: 'Single-End Antibody Sequencing (SEAseq) Pipeline' - description: 'A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis.' - version: '2.0.0' - details: { - citation: 'https://doi.org/10.1186/s12859-022-04588-z', - contactEmail: 'modupeore.adetunji@stjude.org', - contactOrg: "St Jude Children's Research Hospital", - contactUrl: "", - upstreamLicenses: "MIT", - upstreamUrl: 'https://github.com/stjude/seaseq', - whatsNew: [ - { - version: "2.0", - changes: ["version of case/sample only", "single-end sequencing with input/control sequencing data", "Initial release"] - } - ] - } - parameter_group: { - reference_genome: { - title: 'Reference genome', - description: 'Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .', - help: 'Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.' - }, - input_genomic_data: { - title: 'Input FASTQ data', - description: 'Genomic input files for experiment.', - help: 'Input one or more sample data and/or SRA identifiers.' - }, - analysis_parameter: { - title: 'Analysis parameter', - description: 'Analysis settings needed for experiment.', - help: 'Analysis settings; such output analysis file name.' - } - } - } - input { - # group: reference_genome - File reference - File? spikein_reference - File? blacklist - File gtf - Array[File]? bowtie_index - Array[File]? spikein_bowtie_index - Array[File]? motif_databases - - # group: input_genomic_data - Array[String]? sample_sraid - Array[File]? sample_fastq - - # group: analysis_parameter - String? results_name - Boolean run_motifs=true - - } - - parameter_meta { - reference: { - description: 'Reference FASTA file', - group: 'reference_genome', - patterns: ["*.fa", "*.fasta", "*.fa.gz", "*.fasta.gz"] - } - blacklist: { - description: 'Blacklist file in BED format', - group: 'reference_genome', - help: 'If defined, blacklist regions listed are excluded after reference alignment.', - patterns: ["*.bed", "*.bed.gz"] - } - gtf: { - description: 'gene annotation file (.gtf)', - group: 'reference_genome', - help: 'Input gene annotation file from RefSeq or GENCODE (.gtf).', - patterns: ["*.gtf", "*.gtf.gz", "*.gff", "*.gff.gz", "*.gff3", "*.gff3.gz"] - } - bowtie_index: { - description: 'bowtie v1 index files (*.ebwt)', - group: 'reference_genome', - help: 'If not defined, bowtie v1 index files are generated, will take a longer compute time.', - patterns: ["*.ebwt"] - } - motif_databases: { - description: 'One or more of the MEME suite motif databases (*.meme)', - group: 'reference_genome', - help: 'Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).', - patterns: ["*.meme"] - } - sample_sraid: { - description: 'One or more sample SRA (Sequence Read Archive) run identifiers', - group: 'input_genomic_data', - help: 'Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).', - example: 'SRR12345678' - } - sample_fastq: { - description: 'One or more sample FASTQs', - group: 'input_genomic_data', - help: 'Upload zipped FASTQ files.', - patterns: ["*.fq.gz", "*.fastq.gz"] - } - results_name: { - description: 'Experiment results custom name', - group: 'analysis_parameter', - help: 'Input preferred analysis results name (recommended if multiple FASTQs are provided).', - example: 'AllMerge_mapped' - } - run_motifs: { - description: 'Perform Motif Analysis', - group: 'analysis_parameter', - help: 'Setting this means Motif Discovery and Enrichment analysis will be performed.', - example: true - } - } - -### ---------------------------------------- ### -### ------------ S E C T I O N 1 ----------- ### -### ------ Pre-process Analysis Files ------ ### -### ---------------------------------------- ### - - # Process SRRs - if ( defined(sample_sraid) ) { - # Download sample file(s) from SRA database - # outputs: - # fastqdump.fastqfile : downloaded sample files in fastq.gz format - Array[String] string_sra = [1] #buffer to allow for sra_id optionality - Array[String] s_sraid = select_first([sample_sraid, string_sra]) - scatter (eachsra in s_sraid) { - call sra.fastqdump { - input : - sra_id=eachsra, - cloud=false - } - } # end scatter each sra - - Array[File] sample_srafile = flatten(fastqdump.fastqfile) - } # end if sample_sraid - - # Generating INDEX files - #1. Bowtie INDEX files if not provided - if ( !defined(bowtie_index) ) { - # create bowtie index when not provided - call bowtie.index as bowtie_idx { - input : - reference=reference - } - } - #2. Make sure indexes are six else build indexes - if ( defined(bowtie_index) ) { - # check total number of bowtie indexes provided - Array[String] string_bowtie_index = [1] #buffer to allow for bowtie_index optionality - Array[File] int_bowtie_index = select_first([bowtie_index, string_bowtie_index]) - if ( length(int_bowtie_index) != 6 ) { - # create bowtie index if 6 index files aren't provided - call bowtie.index as bowtie_idx_2 { - input : - reference=reference - } - } - } - Array[File] actual_bowtie_index = select_first([bowtie_idx_2.bowtie_indexes, bowtie_idx.bowtie_indexes, bowtie_index]) - - # Spike-in DNA - #3. Bowtie INDEX files if not provided - String string_spikein = "1" - Array[String] string_spikein_buffer = [1] - if ( !defined(spikein_bowtie_index) && defined(spikein_reference) ) { - # create bowtie index on spikein genome - call bowtie.index as spikein_bowtie_idx { - input : - reference=select_first([spikein_reference, string_spikein]) - } - } - - #4. Make sure indexes are six else build indexes for Spike-in DNA - if ( defined(spikein_bowtie_index) ) { - # check total number of bowtie indexes provided - Array[File] int_spikein_bowtie_index = select_first([spikein_bowtie_index, string_spikein_buffer]) - if ( length(int_spikein_bowtie_index) != 6 ) { - # create bowtie index if 6 index files aren't provided - call bowtie.index as spikein_bowtie_idx_2 { - input : - reference=select_first([spikein_reference, string_spikein]) - } - } - } - Array[File] actual_spikein_bowtie_index = select_first([spikein_bowtie_idx_2.bowtie_indexes, spikein_bowtie_idx.bowtie_indexes, spikein_bowtie_index, string_spikein_buffer]) - - # FASTA faidx and chromsizes and effective genome size - call samtools.faidx as samtools_faidx { - # create FASTA index and chrom sizes files - input : - reference=reference - } - call util.effective_genome_size as egs { - # effective genome size for FASTA - input : - reference=reference - } - - # Process FASTQs - if ( defined(sample_fastq) ) { - - Array[String] string_fastq = [1] #buffer to allow for fastq optionality - Array[File] s_fastq = select_first([sample_fastq, string_fastq]) - - Array[File] sample_fastqfile = s_fastq - } - Array[File] original_fastqfiles = flatten(select_all([sample_srafile, sample_fastqfile])) - -### ------------------------------------------------- ### -### ---------------- S E C T I O N 1 ---------------- ### -### ----------- B: remove Spike-IN reads ------------ ### -### ------------------------------------------------- ### - - # if multiple fastqfiles are provided - Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false - Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false - - if ( defined(spikein_bowtie_index) || defined(spikein_reference) ) { - scatter (eachfastq in original_fastqfiles) { - call fastqc.fastqc as spikein_indv_fastqc { - input : - inputfile=eachfastq, - default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' - } - call util.basicfastqstats as spikein_indv_bfs { - input : - fastqfile=eachfastq, - default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' - } - call bowtie.spikein_SE as spikein_indv_map { - input : - fastqfile=eachfastq, - index_files=actual_spikein_bowtie_index, - metricsfile=spikein_indv_bfs.metrics_out, - default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' - } - } - - Array[File] spikein_fastqfiles = spikein_indv_map.unaligned - } - Array[File] fastqfiles = select_first([spikein_fastqfiles, original_fastqfiles]) - -### ------------------------------------------------- ### -### ---------------- S E C T I O N 2 ---------------- ### -### ---- A: analysis if multiple FASTQs provided ---- ### -### ------------------------------------------------- ### - - if ( multi_fastq ) { - scatter (eachfastq in fastqfiles) { - # Execute analysis on each fastq file provided - # Analysis executed: - # FastQC - # FASTQ read length distribution - # Reference Alignment using Bowtie (-k2 -m2) - # Convert SAM to BAM - # FastQC on BAM files - # Remove Blacklists (if provided) - # Remove read duplicates - # Summary statistics on FASTQs - # Combine html files into one for easy viewing - - call fastqc.fastqc as indv_fastqc { - input : - inputfile=eachfastq, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call util.basicfastqstats as indv_bfs { - input : - fastqfile=eachfastq, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' - } - - call mapping.mapping as indv_mapping { - input : - fastqfile=eachfastq, - index_files=actual_bowtie_index, - metricsfile=indv_bfs.metrics_out, - blacklist=blacklist, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/BAM_files' - } - - call fastqc.fastqc as indv_bamfqc { - input : - inputfile=indv_mapping.sorted_bam, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call runspp.runspp as indv_runspp { - input: - bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) - } - - call bedtools.bamtobed as indv_bamtobed { - input: - bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) - } - - call util.evalstats as indv_summarystats { - input: - fastq_type="SEAseq Sample FASTQ", - bambed=indv_bamtobed.bedfile, - sppfile=indv_runspp.spp_out, - fastqczip=indv_fastqc.zipfile, - bamflag=indv_mapping.bam_stats, - rmdupflag=indv_mapping.mkdup_stats, - bkflag=indv_mapping.bklist_stats, - fastqmetrics=indv_bfs.metrics_out, - default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' - } - } # end scatter (for each sample fastq) - - # MERGE BAM FILES - # Execute analysis on merge bam file - # Analysis executed: - # Merge BAM (if more than 1 fastq is provided) - # FastQC on Merge BAM (AllMerge__mapped) - - # merge bam files and perform fasTQC if more than one is provided - call util.mergehtml { - input: - htmlfiles=indv_summarystats.xhtml, - txtfiles=indv_summarystats.textfile, - default_location='SAMPLE', - outputfile = 'AllMapped_' + length(fastqfiles) + '_seaseq-summary-stats.html' - } - - call samtools.mergebam { - input: - bamfiles=indv_mapping.sorted_bam, - metricsfiles=indv_bfs.metrics_out, - default_location = if defined(results_name) then results_name + '/BAM_files' else 'AllMerge_' + length(indv_mapping.sorted_bam) + '_mapped' + '/BAM_files', - outputfile = if defined(results_name) then results_name + '.sorted.bam' else 'AllMerge_' + length(fastqfiles) + '_mapped.sorted.bam' - } - - call fastqc.fastqc as mergebamfqc { - input: - inputfile=mergebam.mergebam, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/QC/FastQC' - } - - call samtools.indexstats as mergeindexstats { - input: - bamfile=mergebam.mergebam, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' - } - - if ( defined(blacklist) ) { - # remove blacklist regions - String string_blacklist = "" #buffer to allow for blacklist optionality - File blacklist_file = select_first([blacklist, string_blacklist]) - call bedtools.intersect as merge_rmblklist { - input : - fileA=mergebam.mergebam, - fileB=blacklist_file, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files', - nooverlap=true - } - call samtools.indexstats as merge_bklist { - input : - bamfile=merge_rmblklist.intersect_out, - default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' - } - } # end if blacklist provided - - File mergebam_afterbklist = select_first([merge_rmblklist.intersect_out, mergebam.mergebam]) - - call samtools.markdup as merge_markdup { - input : - bamfile=mergebam_afterbklist, - default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' - } - - call samtools.indexstats as merge_mkdup { - input : - bamfile=merge_markdup.mkdupbam, - default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' - } - } # end if length(fastqfiles) > 1: multi_fastq - -### ---------------------------------------- ### -### ------------ S E C T I O N 2 ----------- ### -### -- B: analysis if one FASTQ provided --- ### -### ---------------------------------------- ### - - # if only one fastqfile is provided - if ( one_fastq ) { - # Execute analysis on each fastq file provided - # Analysis executed: - # FastQC - # FASTQ read length distribution - # Reference Alignment using Bowtie (-k2 -m2) - # Convert SAM to BAM - # FastQC on BAM files - # Remove Blacklists (if provided) - # Remove read duplicates - # Summary statistics on FASTQs - # Combine html files into one for easy viewing - - call fastqc.fastqc as uno_fastqc { - input : - inputfile=fastqfiles[0], - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call util.basicfastqstats as uno_bfs { - input : - fastqfile=fastqfiles[0], - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' - } - - call mapping.mapping { - input : - fastqfile=fastqfiles[0], - index_files=actual_bowtie_index, - metricsfile=uno_bfs.metrics_out, - blacklist=blacklist, - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/BAM_files' - } - - call fastqc.fastqc as uno_bamfqc { - input : - inputfile=mapping.sorted_bam, - default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' - } - - call runspp.runspp as uno_runspp { - input: - bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) - } - - call bedtools.bamtobed as uno_bamtobed { - input: - bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) - } - } # end if length(fastqfiles) == 1: one_fastq - -### ---------------------------------------- ### -### ------------ S E C T I O N 3 ----------- ### -### ----------- ChIP-seq analysis ---------- ### -### ---------------------------------------- ### - - # ChIP-seq and downstream analysis - # Execute analysis on merge bam file - # Analysis executed: - # FIRST: Check if reads are mapped - # Peaks identification (SICER, MACS, ROSE) - # Motif analysis - # Complete Summary statistics - - #collate correct files for downstream analysis - File sample_bam = select_first([mergebam_afterbklist, mapping.bklist_bam, mapping.sorted_bam]) - - call macs.macs { - input : - bamfile=sample_bam, - pvalue="1e-9", - keep_dup="auto", - egs=egs.genomesize, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-auto', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-auto' - } - - call util.addreadme { - input : - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS' - } - - call macs.macs as all { - input : - bamfile=sample_bam, - pvalue="1e-9", - keep_dup="all", - egs=egs.genomesize, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-all', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-all' - } - - call macs.macs as nomodel { - input : - bamfile=sample_bam, - nomodel=true, - egs=egs.genomesize, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-nm', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_nm' - } - - call bamtogff.bamtogff { - input : - gtffile=gtf, - chromsizes=samtools_faidx.chromsizes, - bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), - bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/BAM_Density' - } - - call bedtools.bamtobed as forsicerbed { - input : - bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]) - } - - call sicer.sicer { - input : - bedfile=forsicerbed.bedfile, - chromsizes=samtools_faidx.chromsizes, - genome_fraction=egs.genomefraction, - fragmentlength=select_first([uno_bfs.readlength, mergebam.avg_readlength]), - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/BROAD_peaks', - coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' - } - - call rose.rose { - input : - gtffile=gtf, - bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), - bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), - bedfile_auto=macs.peakbedfile, - bedfile_all=all.peakbedfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/STITCHED_peaks' - } - - call runspp.runspp { - input: - bamfile=sample_bam - } - - call util.peaksanno { - input : - gtffile=gtf, - bedfile=macs.peakbedfile, - chromsizes=samtools_faidx.chromsizes, - summitfile=macs.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') - } - - call util.peaksanno as all_peaksanno { - input : - gtffile=gtf, - bedfile=all.peakbedfile, - chromsizes=samtools_faidx.chromsizes, - summitfile=all.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') - } - - call util.peaksanno as nomodel_peaksanno { - input : - gtffile=gtf, - bedfile=nomodel.peakbedfile, - chromsizes=samtools_faidx.chromsizes, - summitfile=nomodel.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') - } - - call util.peaksanno as sicer_peaksanno { - input : - gtffile=gtf, - bedfile=sicer.scoreisland, - chromsizes=samtools_faidx.chromsizes, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/BROAD_peaks' - } - - # Motif Analysis - if (run_motifs) { - call motifs.motifs { - input: - reference=reference, - reference_index=samtools_faidx.faidx_file, - bedfile=macs.peakbedfile, - motif_databases=motif_databases, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' - } - - call util.flankbed { - input : - bedfile=macs.summitsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' - } - - call motifs.motifs as flank { - input: - reference=reference, - reference_index=samtools_faidx.faidx_file, - bedfile=flankbed.flankbedfile, - motif_databases=motif_databases, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' - } - } - - call viz.visualization { - input: - wigfile=macs.wigfile, - chromsizes=samtools_faidx.chromsizes, - xlsfile=macs.peakxlsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') - } - - call viz.visualization as vizall { - input: - wigfile=all.wigfile, - chromsizes=samtools_faidx.chromsizes, - xlsfile=all.peakxlsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') - } - - call viz.visualization as viznomodel { - input: - wigfile=nomodel.wigfile, - chromsizes=samtools_faidx.chromsizes, - xlsfile=nomodel.peakxlsfile, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') - } - - call viz.visualization as vizsicer { - input: - wigfile=sicer.wigfile, - chromsizes=samtools_faidx.chromsizes, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' - } - - call bedtools.bamtobed as finalbed { - input: - bamfile=sample_bam - } - - call sortbed.sortbed { - input: - bedfile=finalbed.bedfile - } - - call bedtools.intersect { - input: - fileA=macs.peakbedfile, - fileB=sortbed.sortbed_out, - countoverlap=true, - sorted=true - } - -### ---------------------------------------- ### -### ------------ S E C T I O N 4 ----------- ### -### ---------- Summary Statistics ---------- ### -### ---------------------------------------- ### - - String string_qual = "" #buffer to allow for optionality in if statement - - #SUMMARY STATISTICS - if ( one_fastq ) { - call util.evalstats as uno_summarystats { - # SUMMARY STATISTICS of sample file (only 1 sample file provided) - input: - fastq_type="SEAseq Sample FASTQ", - bambed=finalbed.bedfile, - sppfile=runspp.spp_out, - fastqczip=select_first([uno_bamfqc.zipfile, string_qual]), - bamflag=mapping.bam_stats, - rmdupflag=mapping.mkdup_stats, - bkflag=mapping.bklist_stats, - fastqmetrics=uno_bfs.metrics_out, - countsfile=intersect.intersect_out, - peaksxls=macs.peakxlsfile, - enhancers=rose.enhancers, - superenhancers=rose.super_enhancers, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' - } - - call util.summaryreport as uno_overallsummary { - # Presenting all quality stats for the analysis - input: - overallqc_html=uno_summarystats.xhtml, - overallqc_txt=uno_summarystats.textfile - } - } # end if one_fastq - - if ( multi_fastq ) { - call util.evalstats as merge_summarystats { - # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) - input: - fastq_type="SEAseq Comprehensive", - bambed=finalbed.bedfile, - sppfile=runspp.spp_out, - fastqczip=select_first([mergebamfqc.zipfile, string_qual]), - bamflag=mergeindexstats.flagstats, - rmdupflag=merge_mkdup.flagstats, - bkflag=merge_bklist.flagstats, - countsfile=intersect.intersect_out, - peaksxls=macs.peakxlsfile, - enhancers=rose.enhancers, - superenhancers=rose.super_enhancers, - default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' - } - - call util.summaryreport as merge_overallsummary { - # Presenting all quality stats for the analysis - input: - sampleqc_html=mergehtml.xhtml, - overallqc_html=merge_summarystats.xhtml, - sampleqc_txt=mergehtml.mergetxt, - overallqc_txt=merge_summarystats.textfile - } - } # end if multi_fastq - - output { - #SPIKE-IN - Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile - Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile - Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output - - #FASTQC - Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile - Array[File?]? indv_s_zipfile = indv_fastqc.zipfile - Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile - Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile - - File? s_mergebam_htmlfile = mergebamfqc.htmlfile - File? s_mergebam_zipfile = mergebamfqc.zipfile - - File? uno_s_htmlfile = uno_fastqc.htmlfile - File? uno_s_zipfile = uno_fastqc.zipfile - File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile - File? uno_s_bam_zipfile = uno_bamfqc.zipfile - - #BASICMETRICS - Array[File?]? s_metrics_out = indv_bfs.metrics_out - File? uno_s_metrics_out = uno_bfs.metrics_out - - #BAMFILES - Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam - Array[File?]? indv_s_indexbam = indv_mapping.bam_index - Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam - Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index - Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam - Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index - - File? uno_s_sortedbam = mapping.sorted_bam - File? uno_s_indexstatsbam = mapping.bam_index - File? uno_s_bkbam = mapping.bklist_bam - File? uno_s_bkindexbam = mapping.bklist_index - File? uno_s_rmbam = mapping.mkdup_bam - File? uno_s_rmindexbam = mapping.mkdup_index - - File? s_mergebamfile = mergebam.mergebam - File? s_mergebamindex = mergeindexstats.indexbam - File? s_bkbam = merge_rmblklist.intersect_out - File? s_bkindexbam = merge_bklist.indexbam - File? s_rmbam = merge_markdup.mkdupbam - File? s_rmindexbam = merge_mkdup.indexbam - - #MACS - File? peakbedfile = macs.peakbedfile - File? peakxlsfile = macs.peakxlsfile - File? summitsfile = macs.summitsfile - File? negativexlsfile = macs.negativepeaks - File? wigfile = macs.wigfile - File? all_peakbedfile = all.peakbedfile - File? all_peakxlsfile = all.peakxlsfile - File? all_summitsfile = all.summitsfile - File? all_wigfile = all.wigfile - File? all_negativexlsfile = all.negativepeaks - File? nm_peakbedfile = nomodel.peakbedfile - File? nm_peakxlsfile = nomodel.peakxlsfile - File? nm_summitsfile = nomodel.summitsfile - File? nm_wigfile = nomodel.wigfile - File? nm_negativexlsfile = nomodel.negativepeaks - File? readme_peaks = addreadme.readme_peaks - - #SICER - File? scoreisland = sicer.scoreisland - File? sicer_wigfile = sicer.wigfile - - #ROSE - File? pngfile = rose.pngfile - File? mapped_union = rose.mapped_union - File? mapped_stitch = rose.mapped_stitch - File? enhancers = rose.enhancers - File? super_enhancers = rose.super_enhancers - File? gff_file = rose.gff_file - File? gff_union = rose.gff_union - File? union_enhancers = rose.union_enhancers - File? stitch_enhancers = rose.stitch_enhancers - File? e_to_g_enhancers = rose.e_to_g_enhancers - File? g_to_e_enhancers = rose.g_to_e_enhancers - File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers - File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers - File? supergenes = rose.super_genes - File? allgenes = rose.all_genes - - #MOTIFS - File? flankbedfile = flankbed.flankbedfile - - File? ame_tsv = motifs.ame_tsv - File? ame_html = motifs.ame_html - File? ame_seq = motifs.ame_seq - File? meme = motifs.meme_out - File? meme_summary = motifs.meme_summary - - File? summit_ame_tsv = flank.ame_tsv - File? summit_ame_html = flank.ame_html - File? summit_ame_seq = flank.ame_seq - File? summit_meme = flank.meme_out - File? summit_meme_summary = flank.meme_summary - - #BAM2GFF - File? s_matrices = bamtogff.s_matrices - File? densityplot = bamtogff.densityplot - File? pdf_gene = bamtogff.pdf_gene - File? pdf_h_gene = bamtogff.pdf_h_gene - File? png_h_gene = bamtogff.png_h_gene - File? jpg_h_gene = bamtogff.jpg_h_gene - File? pdf_promoters = bamtogff.pdf_promoters - File? pdf_h_promoters = bamtogff.pdf_h_promoters - File? png_h_promoters = bamtogff.png_h_promoters - File? jpg_h_promoters = bamtogff.jpg_h_promoters - - #PEAKS-ANNOTATION - File? peak_promoters = peaksanno.peak_promoters - File? peak_genebody = peaksanno.peak_genebody - File? peak_window = peaksanno.peak_window - File? peak_closest = peaksanno.peak_closest - File? peak_comparison = peaksanno.peak_comparison - File? gene_comparison = peaksanno.gene_comparison - File? pdf_comparison = peaksanno.pdf_comparison - - File? all_peak_promoters = all_peaksanno.peak_promoters - File? all_peak_genebody = all_peaksanno.peak_genebody - File? all_peak_window = all_peaksanno.peak_window - File? all_peak_closest = all_peaksanno.peak_closest - File? all_peak_comparison = all_peaksanno.peak_comparison - File? all_gene_comparison = all_peaksanno.gene_comparison - File? all_pdf_comparison = all_peaksanno.pdf_comparison - - File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters - File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody - File? nomodel_peak_window = nomodel_peaksanno.peak_window - File? nomodel_peak_closest = nomodel_peaksanno.peak_closest - File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison - File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison - File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison - - File? sicer_peak_promoters = sicer_peaksanno.peak_promoters - File? sicer_peak_genebody = sicer_peaksanno.peak_genebody - File? sicer_peak_window = sicer_peaksanno.peak_window - File? sicer_peak_closest = sicer_peaksanno.peak_closest - File? sicer_peak_comparison = sicer_peaksanno.peak_comparison - File? sicer_gene_comparison = sicer_peaksanno.gene_comparison - File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison - - #VISUALIZATION - File? bigwig = visualization.bigwig - File? norm_wig = visualization.norm_wig - File? tdffile = visualization.tdffile - File? n_bigwig = viznomodel.bigwig - File? n_norm_wig = viznomodel.norm_wig - File? n_tdffile = viznomodel.tdffile - File? a_bigwig = vizall.bigwig - File? a_norm_wig = vizall.norm_wig - File? a_tdffile = vizall.tdffile - - File? s_bigwig = vizsicer.bigwig - File? s_norm_wig = vizsicer.norm_wig - File? s_tdffile = vizsicer.tdffile - - #QC-STATS - Array[File?]? s_qc_statsfile = indv_summarystats.statsfile - Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile - Array[File?]? s_qc_textfile = indv_summarystats.textfile - File? s_qc_mergehtml = mergehtml.mergefile - - File? s_uno_statsfile = uno_summarystats.statsfile - File? s_uno_htmlfile = uno_summarystats.htmlfile - File? s_uno_textfile = uno_summarystats.textfile - - File? statsfile = merge_summarystats.statsfile - File? htmlfile = merge_summarystats.htmlfile - File? textfile = merge_summarystats.textfile - - File? summaryhtml = select_first([uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml]) - File? summarytxt = select_first([uno_overallsummary.summarytxt,merge_overallsummary.summarytxt]) - } -} \ No newline at end of file From 72d92386c4944ee0b11dbf74c32e2cddd5a6de37 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 19:01:51 -0400 Subject: [PATCH 29/60] revise: change how if elses are handled --- wdl-format/src/v1/expr.rs | 27 +- .../source.formatted.wdl | 582 ++++++++++++------ .../if_then_else_exprs/source.formatted.wdl | 25 + .../format/if_then_else_exprs/source.wdl | 23 + .../format/seaseq-case/source.formatted.wdl | 33 +- 5 files changed, 482 insertions(+), 208 deletions(-) create mode 100644 wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl create mode 100644 wdl-format/tests/format/if_then_else_exprs/source.wdl diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index cb3c0efc0..42042f9df 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -668,7 +668,20 @@ pub fn format_parenthesized_expr(element: &FormatElement, stream: &mut TokenStre pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream) { let mut children = element.children().expect("if expr children"); - let nested_else_if = matches!(stream.last_literal_kind(), Some(SyntaxKind::ElseKeyword)); + let last = stream.last_literal_kind(); + + // Nested `if` expressions are a special case where we don't want to add parentheses + // or increment the indent level. + // Otherwise, we need to add parentheses and increment the indent if the last token is not an open parenthesis. + let nested_else_if = matches!(last, Some(SyntaxKind::ElseKeyword)); + let paren_needed = !matches!(last, Some(SyntaxKind::OpenParen)) && !nested_else_if; + + if paren_needed { + stream.push_literal("(".to_string(), SyntaxKind::OpenParen); + } + if !nested_else_if { + stream.increment_indent(); + } let if_keyword = children.next().expect("if keyword"); assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword); @@ -677,13 +690,7 @@ pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream 1 + File? blacklist_ = ( + if length(blacklists) > 1 then pool_blacklist.ta_pooled - else if length(blacklists) > 0 then blacklists[0] + else if length(blacklists) > 0 + then blacklists[0] else blacklist2_ + ) String mito_chr_name_ = select_first([ mito_chr_name, read_genome_tsv.mito_chr_name, @@ -1101,10 +1110,13 @@ workflow chip { ]) ### temp vars (do not define these) - String aligner_ = if defined(custom_align_py) + String aligner_ = ( + if defined(custom_align_py) then "custom" else aligner - String peak_caller_ = if pipeline_type == "tf" + ) + String peak_caller_ = ( + if pipeline_type == "tf" then select_first([ peak_caller, "spp", @@ -1113,17 +1125,24 @@ workflow chip { peak_caller, "macs2", ]) - String peak_type_ = if peak_caller_ == "spp" + ) + String peak_type_ = ( + if peak_caller_ == "spp" then "regionPeak" else "narrowPeak" + ) Boolean enable_idr = pipeline_type == "tf" # enable_idr for TF chipseq only - String idr_rank_ = if peak_caller_ == "spp" + String idr_rank_ = ( + if peak_caller_ == "spp" then "signal.value" - else if peak_caller_ == "macs2" then "p.value" + else if peak_caller_ == "macs2" + then "p.value" else "p.value" + ) Int cap_num_peak_spp = 300000 Int cap_num_peak_macs2 = 500000 - Int cap_num_peak_ = if peak_caller_ == "spp" + Int cap_num_peak_ = ( + if peak_caller_ == "spp" then select_first([ cap_num_peak, cap_num_peak_spp, @@ -1132,38 +1151,58 @@ workflow chip { cap_num_peak, cap_num_peak_macs2, ]) + ) Int mapq_thresh_ = mapq_thresh - Boolean enable_xcor_ = if pipeline_type == "control" + Boolean enable_xcor_ = ( + if pipeline_type == "control" then false else true - Boolean enable_count_signal_track_ = if pipeline_type == "control" + ) + Boolean enable_count_signal_track_ = ( + if pipeline_type == "control" then false else enable_count_signal_track - Boolean enable_jsd_ = if pipeline_type == "control" + ) + Boolean enable_jsd_ = ( + if pipeline_type == "control" then false else enable_jsd - Boolean enable_gc_bias_ = if pipeline_type == "control" + ) + Boolean enable_gc_bias_ = ( + if pipeline_type == "control" then false else enable_gc_bias - Boolean align_only_ = if pipeline_type == "control" + ) + Boolean align_only_ = ( + if pipeline_type == "control" then true else align_only + ) - Float align_mem_factor_ = if aligner_ == "bowtie2" + Float align_mem_factor_ = ( + if aligner_ == "bowtie2" then align_bowtie2_mem_factor else align_bwa_mem_factor - Float align_disk_factor_ = if aligner_ == "bowtie2" + ) + Float align_disk_factor_ = ( + if aligner_ == "bowtie2" then align_bowtie2_disk_factor else align_bwa_disk_factor - Float call_peak_mem_factor_ = if peak_caller_ == "spp" + ) + Float call_peak_mem_factor_ = ( + if peak_caller_ == "spp" then call_peak_spp_mem_factor else call_peak_macs2_mem_factor - Float call_peak_disk_factor_ = if peak_caller_ == "spp" + ) + Float call_peak_disk_factor_ = ( + if peak_caller_ == "spp" then call_peak_spp_disk_factor else call_peak_macs2_disk_factor + ) # temporary 2-dim fastqs array [rep_id][merge_id] - Array[Array[File]] fastqs_R1 = if length(fastqs_rep10_R1) > 0 + Array[Array[File]] fastqs_R1 = ( + if length(fastqs_rep10_R1) > 0 then [ fastqs_rep1_R1, fastqs_rep2_R1, @@ -1176,7 +1215,8 @@ workflow chip { fastqs_rep9_R1, fastqs_rep10_R1, ] - else if length(fastqs_rep9_R1) > 0 then [ + else if length(fastqs_rep9_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, @@ -1187,7 +1227,8 @@ workflow chip { fastqs_rep8_R1, fastqs_rep9_R1, ] - else if length(fastqs_rep8_R1) > 0 then [ + else if length(fastqs_rep8_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, @@ -1197,7 +1238,8 @@ workflow chip { fastqs_rep7_R1, fastqs_rep8_R1, ] - else if length(fastqs_rep7_R1) > 0 then [ + else if length(fastqs_rep7_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, @@ -1206,7 +1248,8 @@ workflow chip { fastqs_rep6_R1, fastqs_rep7_R1, ] - else if length(fastqs_rep6_R1) > 0 then [ + else if length(fastqs_rep6_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, @@ -1214,32 +1257,38 @@ workflow chip { fastqs_rep5_R1, fastqs_rep6_R1, ] - else if length(fastqs_rep5_R1) > 0 then [ + else if length(fastqs_rep5_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, ] - else if length(fastqs_rep4_R1) > 0 then [ + else if length(fastqs_rep4_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, ] - else if length(fastqs_rep3_R1) > 0 then [ + else if length(fastqs_rep3_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, ] - else if length(fastqs_rep2_R1) > 0 then [ + else if length(fastqs_rep2_R1) > 0 + then [ fastqs_rep1_R1, fastqs_rep2_R1, ] - else if length(fastqs_rep1_R1) > 0 then [ + else if length(fastqs_rep1_R1) > 0 + then [ fastqs_rep1_R1, ] else [] + ) # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] fastqs_R2 = [ fastqs_rep1_R2, @@ -1255,7 +1304,8 @@ workflow chip { ] # temporary 2-dim ctl fastqs array [rep_id][merge_id] - Array[Array[File]] ctl_fastqs_R1 = if length(ctl_fastqs_rep10_R1) > 0 + Array[Array[File]] ctl_fastqs_R1 = ( + if length(ctl_fastqs_rep10_R1) > 0 then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, @@ -1268,7 +1318,8 @@ workflow chip { ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1, ] - else if length(ctl_fastqs_rep9_R1) > 0 then [ + else if length(ctl_fastqs_rep9_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, @@ -1279,7 +1330,8 @@ workflow chip { ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ] - else if length(ctl_fastqs_rep8_R1) > 0 then [ + else if length(ctl_fastqs_rep8_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, @@ -1289,7 +1341,8 @@ workflow chip { ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ] - else if length(ctl_fastqs_rep7_R1) > 0 then [ + else if length(ctl_fastqs_rep7_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, @@ -1298,7 +1351,8 @@ workflow chip { ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ] - else if length(ctl_fastqs_rep6_R1) > 0 then [ + else if length(ctl_fastqs_rep6_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, @@ -1306,32 +1360,38 @@ workflow chip { ctl_fastqs_rep5_R1, ctl_fastqs_rep6_R1, ] - else if length(ctl_fastqs_rep5_R1) > 0 then [ + else if length(ctl_fastqs_rep5_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, ] - else if length(ctl_fastqs_rep4_R1) > 0 then [ + else if length(ctl_fastqs_rep4_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ] - else if length(ctl_fastqs_rep3_R1) > 0 then [ + else if length(ctl_fastqs_rep3_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ] - else if length(ctl_fastqs_rep2_R1) > 0 then [ + else if length(ctl_fastqs_rep2_R1) > 0 + then [ ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ] - else if length(ctl_fastqs_rep1_R1) > 0 then [ + else if length(ctl_fastqs_rep1_R1) > 0 + then [ ctl_fastqs_rep1_R1, ] else [] + ) # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] ctl_fastqs_R2 = [ ctl_fastqs_rep1_R2, @@ -1349,31 +1409,45 @@ workflow chip { # temporary variables to get number of replicates # WDLic implementation of max(A,B,C,...) Int num_rep_fastq = length(fastqs_R1) - Int num_rep_bam = if length(bams) < num_rep_fastq + Int num_rep_bam = ( + if length(bams) < num_rep_fastq then num_rep_fastq else length(bams) - Int num_rep_nodup_bam = if length(nodup_bams) < num_rep_bam + ) + Int num_rep_nodup_bam = ( + if length(nodup_bams) < num_rep_bam then num_rep_bam else length(nodup_bams) - Int num_rep_ta = if length(tas) < num_rep_nodup_bam + ) + Int num_rep_ta = ( + if length(tas) < num_rep_nodup_bam then num_rep_nodup_bam else length(tas) - Int num_rep_peak = if length(peaks) < num_rep_ta + ) + Int num_rep_peak = ( + if length(peaks) < num_rep_ta then num_rep_ta else length(peaks) + ) Int num_rep = num_rep_peak # temporary variables to get number of controls Int num_ctl_fastq = length(ctl_fastqs_R1) - Int num_ctl_bam = if length(ctl_bams) < num_ctl_fastq + Int num_ctl_bam = ( + if length(ctl_bams) < num_ctl_fastq then num_ctl_fastq else length(ctl_bams) - Int num_ctl_nodup_bam = if length(ctl_nodup_bams) < num_ctl_bam + ) + Int num_ctl_nodup_bam = ( + if length(ctl_nodup_bams) < num_ctl_bam then num_ctl_bam else length(ctl_nodup_bams) - Int num_ctl_ta = if length(ctl_tas) < num_ctl_nodup_bam + ) + Int num_ctl_ta = ( + if length(ctl_tas) < num_ctl_nodup_bam then num_ctl_nodup_bam else length(ctl_tas) + ) Int num_ctl = num_ctl_ta # sanity check for inputs @@ -1437,21 +1511,24 @@ workflow chip { scatter (i in range(num_rep)) { # to override endedness definition for individual replicate # paired_end will override paired_ends[i] - Boolean paired_end_ = if !defined(paired_end) && i < length(paired_ends) + Boolean paired_end_ = ( + if !defined(paired_end) && i < length(paired_ends) then paired_ends[i] else select_first([ paired_end, ]) + ) Boolean has_input_of_align = i < length(fastqs_R1) && length(fastqs_R1[i]) > 0 Boolean has_output_of_align = i < length(bams) if (has_input_of_align && !has_output_of_align) { call align { input: fastqs_R1 = fastqs_R1[i], - fastqs_R2 = if paired_end_ + fastqs_R2 = ( + if paired_end_ then fastqs_R2[i] else [] - , + ), crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1459,11 +1536,13 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = if aligner == "bwa" + idx_tar = ( + if aligner == "bwa" then bwa_idx_tar_ - else if aligner == "bowtie2" then bowtie2_idx_tar_ + else if aligner == "bowtie2" + then bowtie2_idx_tar_ else custom_aligner_idx_tar - , + ), paired_end = paired_end_, use_bwa_mem_for_pe = use_bwa_mem_for_pe, bwa_mem_read_len_limit = bwa_mem_read_len_limit, @@ -1478,9 +1557,11 @@ workflow chip { runtime_environment = runtime_environment, } } - File? bam_ = if has_output_of_align + File? bam_ = ( + if has_output_of_align then bams[i] else align.bam + ) Boolean has_input_of_filter = has_output_of_align || defined(align.bam) Boolean has_output_of_filter = i < length(nodup_bams) @@ -1506,9 +1587,11 @@ workflow chip { runtime_environment = runtime_environment, } } - File? nodup_bam_ = if has_output_of_filter + File? nodup_bam_ = ( + if has_output_of_filter then nodup_bams[i] else filter.nodup_bam + ) Boolean has_input_of_bam2ta = has_output_of_filter || defined(filter.nodup_bam) Boolean has_output_of_bam2ta = i < length(tas) @@ -1526,9 +1609,11 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ta_ = if has_output_of_bam2ta + File? ta_ = ( + if has_output_of_bam2ta then tas[i] else bam2ta.ta + ) Boolean has_input_of_spr = has_output_of_bam2ta || defined(bam2ta.ta) if (has_input_of_spr && !align_only_ && !true_rep_only) { @@ -1574,11 +1659,13 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = if aligner == "bwa" + idx_tar = ( + if aligner == "bwa" then bwa_idx_tar_ - else if aligner == "bowtie2" then bowtie2_idx_tar_ + else if aligner == "bowtie2" + then bowtie2_idx_tar_ else custom_aligner_idx_tar - , + ), paired_end = false, use_bwa_mem_for_pe = false, bwa_mem_read_len_limit = 0, @@ -1663,13 +1750,18 @@ workflow chip { # if not starting from fastqs, keep using old method # (mapping with both ends for tag-aligns to be used for xcor) # subsample tagalign (non-mito) and cross-correlation analysis - File? ta_xcor = if defined(bam2ta_no_dedup_R1.ta) + File? ta_xcor = ( + if defined(bam2ta_no_dedup_R1.ta) then bam2ta_no_dedup_R1.ta - else if defined(bam2ta_no_dedup.ta) then bam2ta_no_dedup.ta + else if defined(bam2ta_no_dedup.ta) + then bam2ta_no_dedup.ta else ta_ - Boolean paired_end_xcor = if defined(bam2ta_no_dedup_R1.ta) + ) + Boolean paired_end_xcor = ( + if defined(bam2ta_no_dedup_R1.ta) then false else paired_end_ + ) Boolean has_input_of_xcor = defined(ta_xcor) if (has_input_of_xcor && enable_xcor_) { @@ -1691,31 +1783,36 @@ workflow chip { # before peak calling, get fragment length from xcor analysis or given input # if fraglen [] is defined in the input JSON, fraglen from xcor will be ignored - Int? fraglen_ = if i < length(fraglen) + Int? fraglen_ = ( + if i < length(fraglen) then fraglen[i] else xcor.fraglen + ) } # align each control scatter (i in range(num_ctl)) { # to override endedness definition for individual control # ctl_paired_end will override ctl_paired_ends[i] - Boolean ctl_paired_end_ = if !defined(ctl_paired_end) && i < length(ctl_paired_ends) + Boolean ctl_paired_end_ = ( + if !defined(ctl_paired_end) && i < length(ctl_paired_ends) then ctl_paired_ends[i] else select_first([ ctl_paired_end, paired_end, ]) + ) Boolean has_input_of_align_ctl = i < length(ctl_fastqs_R1) && length(ctl_fastqs_R1[i]) > 0 Boolean has_output_of_align_ctl = i < length(ctl_bams) if (has_input_of_align_ctl && !has_output_of_align_ctl) { call align as align_ctl { input: fastqs_R1 = ctl_fastqs_R1[i], - fastqs_R2 = if ctl_paired_end_ + fastqs_R2 = ( + if ctl_paired_end_ then ctl_fastqs_R2[i] else [] - , + ), crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1723,11 +1820,13 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = if aligner == "bwa" + idx_tar = ( + if aligner == "bwa" then bwa_idx_tar_ - else if aligner == "bowtie2" then bowtie2_idx_tar_ + else if aligner == "bowtie2" + then bowtie2_idx_tar_ else custom_aligner_idx_tar - , + ), paired_end = ctl_paired_end_, use_bwa_mem_for_pe = use_bwa_mem_for_pe, bwa_mem_read_len_limit = bwa_mem_read_len_limit, @@ -1742,9 +1841,11 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_bam_ = if has_output_of_align_ctl + File? ctl_bam_ = ( + if has_output_of_align_ctl then ctl_bams[i] else align_ctl.bam + ) Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam) Boolean has_output_of_filter_ctl = i < length(ctl_nodup_bams) @@ -1770,9 +1871,11 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_nodup_bam_ = if has_output_of_filter_ctl + File? ctl_nodup_bam_ = ( + if has_output_of_filter_ctl then ctl_nodup_bams[i] else filter_ctl.nodup_bam + ) Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam) Boolean has_output_of_bam2ta_ctl = i < length(ctl_tas) @@ -1790,9 +1893,11 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_ta_ = if has_output_of_bam2ta_ctl + File? ctl_ta_ = ( + if has_output_of_bam2ta_ctl then ctl_tas[i] else bam2ta_ctl.ta + ) } # if there are TAs for ALL replicates then pool them @@ -1890,27 +1995,35 @@ workflow chip { # >=0: control TA index (this means that control TA with this index exists) # -1: use pooled control # -2: there is no control - Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ + Int chosen_ctl_ta_id = ( + if has_all_input_of_choose_ctl && !align_only_ then select_first([ choose_ctl.chosen_ctl_ta_ids, ])[i] else -2 - Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ + ) + Int chosen_ctl_ta_subsample = ( + if has_all_input_of_choose_ctl && !align_only_ then select_first([ choose_ctl.chosen_ctl_ta_subsample, ])[i] else 0 - Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 + ) + Boolean chosen_ctl_paired_end = ( + if chosen_ctl_ta_id == -2 then false - else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] + else if chosen_ctl_ta_id == -1 + then ctl_paired_end_[0] else ctl_paired_end_[chosen_ctl_ta_id] + ) if (chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0) { call subsample_ctl { input: - ta = if chosen_ctl_ta_id == -1 + ta = ( + if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled else ctl_ta_[chosen_ctl_ta_id] - , + ), subsample = chosen_ctl_ta_subsample, paired_end = chosen_ctl_paired_end, mem_factor = subsample_ctl_mem_factor, @@ -1918,14 +2031,17 @@ workflow chip { runtime_environment = runtime_environment, } } - Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 + Array[File] chosen_ctl_tas = ( + if chosen_ctl_ta_id <= -2 then [] - else if chosen_ctl_ta_subsample > 0 then [ + else if chosen_ctl_ta_subsample > 0 + then [ select_first([ subsample_ctl.ta_subsampled, ]), ] - else if chosen_ctl_ta_id == -1 then [ + else if chosen_ctl_ta_id == -1 + then [ select_first([ pool_ta_ctl.ta_pooled, ]), @@ -1935,12 +2051,15 @@ workflow chip { ctl_ta_[chosen_ctl_ta_id], ]), ] + ) } - Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ + Int chosen_ctl_ta_pooled_subsample = ( + if has_all_input_of_choose_ctl && !align_only_ then select_first([ choose_ctl.chosen_ctl_ta_subsample_pooled, ]) else 0 + ) # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) Array[Int] fraglen_tmp = select_all(fraglen_) @@ -1972,16 +2091,20 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" + runtime_environment = ( + if peak_caller_ == "spp" then runtime_environment_spp - else if peak_caller_ == "macs2" then runtime_environment_macs2 + else if peak_caller_ == "macs2" + then runtime_environment_macs2 else runtime_environment - , + ), } } - File? peak_ = if has_output_of_call_peak + File? peak_ = ( + if has_output_of_call_peak then peaks[i] else call_peak.peak + ) # signal track if (has_input_of_call_peak && !align_only_) { @@ -2030,16 +2153,20 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" + runtime_environment = ( + if peak_caller_ == "spp" then runtime_environment_spp - else if peak_caller_ == "macs2" then runtime_environment_macs2 + else if peak_caller_ == "macs2" + then runtime_environment_macs2 else runtime_environment - , + ), } } - File? peak_pr1_ = if has_output_of_call_peak_pr1 + File? peak_pr1_ = ( + if has_output_of_call_peak_pr1 then peaks_pr1[i] else call_peak_pr1.peak + ) # call peaks on 2nd pseudo replicated tagalign Boolean has_input_of_call_peak_pr2 = defined(spr.ta_pr2[i]) @@ -2067,16 +2194,20 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" + runtime_environment = ( + if peak_caller_ == "spp" then runtime_environment_spp - else if peak_caller_ == "macs2" then runtime_environment_macs2 + else if peak_caller_ == "macs2" + then runtime_environment_macs2 else runtime_environment - , + ), } } - File? peak_pr2_ = if has_output_of_call_peak_pr2 + File? peak_pr2_ = ( + if has_output_of_call_peak_pr2 then peaks_pr2[i] else call_peak_pr2.peak + ) } # if ( !align_only_ && num_rep > 1 ) { @@ -2091,10 +2222,11 @@ workflow chip { if (has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0) { call subsample_ctl as subsample_ctl_pooled { input: - ta = if num_ctl < 2 + ta = ( + if num_ctl < 2 then ctl_ta_[0] else pool_ta_ctl.ta_pooled - , + ), subsample = chosen_ctl_ta_pooled_subsample, paired_end = ctl_paired_end_[0], mem_factor = subsample_ctl_mem_factor, @@ -2103,17 +2235,21 @@ workflow chip { } } # actually not an array - Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ + Array[File?] chosen_ctl_ta_pooled = ( + if !has_all_input_of_choose_ctl || align_only_ then [] - else if chosen_ctl_ta_pooled_subsample > 0 then [ + else if chosen_ctl_ta_pooled_subsample > 0 + then [ subsample_ctl_pooled.ta_subsampled, ] - else if num_ctl < 2 then [ + else if num_ctl < 2 + then [ ctl_ta_[0], ] else [ pool_ta_ctl.ta_pooled, ] + ) Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) Boolean has_output_of_call_peak_pooled = defined(peak_pooled) @@ -2142,16 +2278,20 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" + runtime_environment = ( + if peak_caller_ == "spp" then runtime_environment_spp - else if peak_caller_ == "macs2" then runtime_environment_macs2 + else if peak_caller_ == "macs2" + then runtime_environment_macs2 else runtime_environment - , + ), } } - File? peak_pooled_ = if has_output_of_call_peak_pooled + File? peak_pooled_ = ( + if has_output_of_call_peak_pooled then peak_pooled else call_peak_pooled.peak + ) # macs2 signal track for pooled rep if (has_input_of_call_peak_pooled && !align_only_ && num_rep > 1) { @@ -2200,16 +2340,20 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" + runtime_environment = ( + if peak_caller_ == "spp" then runtime_environment_spp - else if peak_caller_ == "macs2" then runtime_environment_macs2 + else if peak_caller_ == "macs2" + then runtime_environment_macs2 else runtime_environment - , + ), } } - File? peak_ppr1_ = if has_output_of_call_peak_ppr1 + File? peak_ppr1_ = ( + if has_output_of_call_peak_ppr1 then peak_ppr1 else call_peak_ppr1.peak + ) Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) @@ -2237,16 +2381,20 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = if peak_caller_ == "spp" + runtime_environment = ( + if peak_caller_ == "spp" then runtime_environment_spp - else if peak_caller_ == "macs2" then runtime_environment_macs2 + else if peak_caller_ == "macs2" + then runtime_environment_macs2 else runtime_environment - , + ), } } - File? peak_ppr2_ = if has_output_of_call_peak_ppr2 + File? peak_ppr2_ = ( + if has_output_of_call_peak_ppr2 then peak_ppr2 else call_peak_ppr2.peak + ) # do IDR/overlap on all pairs of two replicates (i,j) # where i and j are zero-based indices and 0 <= i < j < num_rep @@ -2373,12 +2521,13 @@ workflow chip { call reproducibility as reproducibility_overlap { input: prefix = "overlap", peaks = select_all(overlap.bfilt_overlap_peak), - peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) + peaks_pr = ( + if defined(overlap_pr.bfilt_overlap_peak) then select_first([ overlap_pr.bfilt_overlap_peak, ]) else [] - , + ), peak_ppr = overlap_ppr.bfilt_overlap_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2391,12 +2540,13 @@ workflow chip { call reproducibility as reproducibility_idr { input: prefix = "idr", peaks = select_all(idr.bfilt_idr_peak), - peaks_pr = if defined(idr_pr.bfilt_idr_peak) + peaks_pr = ( + if defined(idr_pr.bfilt_idr_peak) then select_first([ idr_pr.bfilt_idr_peak, ]) else [] - , + ), peak_ppr = idr_ppr.bfilt_idr_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2435,12 +2585,13 @@ workflow chip { ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), jsd_plot = jsd.plot, - jsd_qcs = if defined(jsd.jsd_qcs) + jsd_qcs = ( + if defined(jsd.jsd_qcs) then select_first([ jsd.jsd_qcs, ]) else [] - , + ), frip_qcs = select_all(call_peak.frip_qc), frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), @@ -2450,28 +2601,31 @@ workflow chip { frip_qc_ppr2 = call_peak_ppr2.frip_qc, idr_plots = select_all(idr.idr_plot), - idr_plots_pr = if defined(idr_pr.idr_plot) + idr_plots_pr = ( + if defined(idr_pr.idr_plot) then select_first([ idr_pr.idr_plot, ]) else [] - , + ), idr_plot_ppr = idr_ppr.idr_plot, frip_idr_qcs = select_all(idr.frip_qc), - frip_idr_qcs_pr = if defined(idr_pr.frip_qc) + frip_idr_qcs_pr = ( + if defined(idr_pr.frip_qc) then select_first([ idr_pr.frip_qc, ]) else [] - , + ), frip_idr_qc_ppr = idr_ppr.frip_qc, frip_overlap_qcs = select_all(overlap.frip_qc), - frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) + frip_overlap_qcs_pr = ( + if defined(overlap_pr.frip_qc) then select_first([ overlap_pr.frip_qc, ]) else [] - , + ), frip_overlap_qc_ppr = overlap_ppr.frip_qc, idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, @@ -2532,7 +2686,8 @@ task align { Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) Float trimmomatic_java_heap_factor = 0.9 - Array[Array[File]] tmp_fastqs = if paired_end + Array[Array[File]] tmp_fastqs = ( + if paired_end then transpose([ fastqs_R1, fastqs_R2, @@ -2540,6 +2695,7 @@ task align { else transpose([ fastqs_R1, ]) + ) command <<< set -e @@ -2552,10 +2708,11 @@ task align { fi python3 $(which encode_task_merge_fastq.py) \ ~{write_tsv(tmp_fastqs)} \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ ~{"--nth " + cpu} if [ -z '~{trim_bp}' ]; then @@ -2579,26 +2736,30 @@ task align { NEW_SUFFIX="$SUFFIX"_cropped python3 $(which encode_task_trimmomatic.py) \ --fastq1 R1$SUFFIX/*.fastq.gz \ - ~{if paired_end + ~{( + if paired_end then "--fastq2 R2$SUFFIX/*.fastq.gz" else "" - } \ - ~{if paired_end + )} \ + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ --crop-length ~{crop_length} \ --crop-length-tol "~{crop_length_tol}" \ ~{"--phred-score-format " + trimmomatic_phred_score_format} \ --out-dir-R1 R1$NEW_SUFFIX \ - ~{if paired_end + ~{( + if paired_end then "--out-dir-R2 R2$NEW_SUFFIX" else "" - } \ - ~{"--trimmomatic-java-heap " + if defined(trimmomatic_java_heap) + )} \ + ~{"--trimmomatic-java-heap " + ( + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + "G") - } \ + )} \ ~{"--nth " + cpu} SUFFIX=$NEW_SUFFIX fi @@ -2607,18 +2768,21 @@ task align { python3 $(which encode_task_bwa.py) \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{if paired_end + ~{( + if paired_end then "R2$SUFFIX/*.fastq.gz" else "" - } \ - ~{if paired_end + )} \ + ~{( + if paired_end then "--paired-end" else "" - } \ - ~{if use_bwa_mem_for_pe + )} \ + ~{( + if use_bwa_mem_for_pe then "--use-bwa-mem-for-pe" else "" - } \ + )} \ ~{"--bwa-mem-read-len-limit " + bwa_mem_read_len_limit} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} @@ -2627,33 +2791,38 @@ task align { python3 $(which encode_task_bowtie2.py) \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{if paired_end + ~{( + if paired_end then "R2$SUFFIX/*.fastq.gz" else "" - } \ + )} \ ~{"--multimapping " + multimapping} \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } \ - ~{if use_bowtie2_local_mode + )} \ + ~{( + if use_bowtie2_local_mode then "--local" else "" - } \ + )} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} else python3 ~{custom_align_py} \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{if paired_end + ~{( + if paired_end then "R2$SUFFIX/*.fastq.gz" else "" - } \ - ~{if paired_end + )} \ + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} fi @@ -2716,26 +2885,29 @@ task filter { set -e python3 $(which encode_task_filter.py) \ ~{bam} \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ --multimapping 0 \ ~{"--dup-marker " + dup_marker} \ ~{"--mapq-thresh " + mapq_thresh} \ --filter-chrs ~{sep=" " filter_chrs} \ ~{"--chrsz " + chrsz} \ - ~{if no_dup_removal + ~{( + if no_dup_removal then "--no-dup-removal" else "" - } \ + )} \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} \ - ~{"--picard-java-heap " + if defined(picard_java_heap) + ~{"--picard-java-heap " + ( + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G") - } + )} if [ '~{redact_nodup_bam}' == 'true' ]; then python3 $(which encode_task_bam_to_pbam.py) \ @@ -2788,10 +2960,11 @@ task bam2ta { python3 $(which encode_task_bam2ta.py) \ ~{bam} \ --disable-tn5-shift \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--subsample " + subsample} \ ~{"--mem-gb " + samtools_mem_gb} \ @@ -2832,10 +3005,11 @@ task spr { python3 $(which encode_task_spr.py) \ ~{ta} \ ~{"--pseudoreplication-random-seed " + pseudoreplication_random_seed} \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } + )} >>> output { @@ -2911,10 +3085,11 @@ task xcor { set -e python3 $(which encode_task_xcor.py) \ ~{ta} \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--subsample " + subsample} \ ~{"--chip-seq-type " + chip_seq_type} \ @@ -2964,10 +3139,11 @@ task jsd { set -e python3 $(which encode_task_jsd.py) \ ~{sep=" " select_all(nodup_bams)} \ - ~{if length(ctl_bams) > 0 + ~{( + if length(ctl_bams) > 0 then "--ctl-bam " + select_first(ctl_bams) else "" - } \ + )} \ ~{"--mapq-thresh " + mapq_thresh} \ ~{"--blacklist " + blacklist} \ ~{"--nth " + cpu} @@ -3010,10 +3186,11 @@ task choose_ctl { --ctl-tas ~{sep=" " select_all(ctl_tas)} \ ~{"--ta-pooled " + ta_pooled} \ ~{"--ctl-ta-pooled " + ctl_ta_pooled} \ - ~{if always_use_pooled_ctl + ~{( + if always_use_pooled_ctl then "--always-use-pooled-ctl" else "" - } \ + )} \ ~{"--ctl-depth-ratio " + ctl_depth_ratio} \ ~{"--ctl-depth-limit " + ctl_depth_limit} \ ~{"--exp-ctl-depth-ratio-limit " + exp_ctl_depth_ratio_limit} @@ -3090,10 +3267,11 @@ task subsample_ctl { python3 $(which encode_task_subsample_ctl.py) \ ~{ta} \ ~{"--subsample " + subsample} \ - ~{if paired_end + ~{( + if paired_end then "--paired-end" else "" - } \ + )} \ >>> output { @@ -3184,9 +3362,11 @@ task call_peak { } runtime { - cpu: if peak_caller == "macs2" + cpu: ( + if peak_caller == "macs2" then 2 else cpu + ) memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" @@ -3263,10 +3443,11 @@ task idr { command <<< set -e - ~{if defined(ta) + ~{( + if defined(ta) then "" else "touch null.frip.qc" - } + )} touch null python3 $(which encode_task_idr.py) \ ~{peak1} ~{peak2} ~{peak_pooled} \ @@ -3291,9 +3472,11 @@ task idr { File idr_plot = glob("*.txt.png")[0] File idr_unthresholded_peak = glob("*.txt.gz")[0] File idr_log = glob("*.idr*.log")[0] - File frip_qc = if defined(ta) + File frip_qc = ( + if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] + ) } runtime { @@ -3325,10 +3508,11 @@ task overlap { command <<< set -e - ~{if defined(ta) + ~{( + if defined(ta) then "" else "touch null.frip.qc" - } + )} touch null python3 $(which encode_task_overlap.py) \ ~{peak1} ~{peak2} ~{peak_pooled} \ @@ -3349,9 +3533,11 @@ task overlap { File bfilt_overlap_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] File bfilt_overlap_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] File bfilt_overlap_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] - File frip_qc = if defined(ta) + File frip_qc = ( + if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] + ) } runtime { @@ -3437,10 +3623,11 @@ task gc_bias { python3 $(which encode_task_gc_bias.py) \ ~{"--nodup-bam " + nodup_bam} \ ~{"--ref-fa " + ref_fa} \ - ~{"--picard-java-heap " + if defined(picard_java_heap) + ~{"--picard-java-heap " + ( + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + "G") - } + )} >>> output { @@ -3536,10 +3723,11 @@ task qc_report { --ctl-paired-ends ~{sep=" " ctl_paired_ends} \ --pipeline-type ~{pipeline_type} \ --aligner ~{aligner} \ - ~{if (no_dup_removal) + ~{( + if (no_dup_removal) then "--no-dup-removal " else "" - } \ + )} \ --peak-caller ~{peak_caller} \ ~{"--cap-num-peak " + cap_num_peak} \ --idr-thresh ~{idr_thresh} \ @@ -3636,33 +3824,51 @@ task read_genome_tsv { output { String? genome_name = read_string("genome_name") - String? ref_fa = if size("ref_fa") == 0 + String? ref_fa = ( + if size("ref_fa") == 0 then null_s else read_string("ref_fa") - String? bwa_idx_tar = if size("bwa_idx_tar") == 0 + ) + String? bwa_idx_tar = ( + if size("bwa_idx_tar") == 0 then null_s else read_string("bwa_idx_tar") - String? bowtie2_idx_tar = if size("bowtie2_idx_tar") == 0 + ) + String? bowtie2_idx_tar = ( + if size("bowtie2_idx_tar") == 0 then null_s else read_string("bowtie2_idx_tar") - String? chrsz = if size("chrsz") == 0 + ) + String? chrsz = ( + if size("chrsz") == 0 then null_s else read_string("chrsz") - String? gensz = if size("gensz") == 0 + ) + String? gensz = ( + if size("gensz") == 0 then null_s else read_string("gensz") - String? blacklist = if size("blacklist") == 0 + ) + String? blacklist = ( + if size("blacklist") == 0 then null_s else read_string("blacklist") - String? blacklist2 = if size("blacklist2") == 0 + ) + String? blacklist2 = ( + if size("blacklist2") == 0 then null_s else read_string("blacklist2") - String? mito_chr_name = if size("mito_chr_name") == 0 + ) + String? mito_chr_name = ( + if size("mito_chr_name") == 0 then null_s else read_string("mito_chr_name") - String? regex_bfilt_peak_chr_name = if size("regex_bfilt_peak_chr_name") == 0 + ) + String? regex_bfilt_peak_chr_name = ( + if size("regex_bfilt_peak_chr_name") == 0 then "chr[\\dXY]+" else read_string("regex_bfilt_peak_chr_name") + ) } runtime { diff --git a/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl b/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl new file mode 100644 index 000000000..2042a3f76 --- /dev/null +++ b/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl @@ -0,0 +1,25 @@ +## This is a test WDL file for if-then-else expressions +version 1.0 + +workflow if_then_else_exprs { + input { + Int a + Int b + } + + Int c = ( + if (a < b) + then a + else b + ) + + Int d = ( + if (a < b) + then a + else b + ) + + output { + Int result = c + } +} diff --git a/wdl-format/tests/format/if_then_else_exprs/source.wdl b/wdl-format/tests/format/if_then_else_exprs/source.wdl new file mode 100644 index 000000000..322bf08c7 --- /dev/null +++ b/wdl-format/tests/format/if_then_else_exprs/source.wdl @@ -0,0 +1,23 @@ +## This is a test WDL file for if-then-else expressions +version 1.0 +workflow if_then_else_exprs { + input { + Int a + Int b + } + + Int c = ( + if (a < b) + then a + else b + ) + + Int d = + if (a < b) + then a + else b + + output { + Int result = c + } +} diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl index 15f75929a..812ee0761 100644 --- a/wdl-format/tests/format/seaseq-case/source.formatted.wdl +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -280,37 +280,44 @@ workflow seaseq { ### ------------------------------------------------- ### # if multiple fastqfiles are provided - Boolean multi_fastq = if length(original_fastqfiles) > 1 + Boolean multi_fastq = ( + if length(original_fastqfiles) > 1 then true else false - Boolean one_fastq = if length(original_fastqfiles) == 1 + ) + Boolean one_fastq = ( + if length(original_fastqfiles) == 1 then true else false + ) if (defined(spikein_bowtie_index) || defined(spikein_reference)) { scatter (eachfastq in original_fastqfiles) { call fastqc.fastqc as spikein_indv_fastqc { input: inputfile = eachfastq, - default_location = if (one_fastq) + default_location = ( + if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" - , + ), } call util.basicfastqstats as spikein_indv_bfs { input: fastqfile = eachfastq, - default_location = if (one_fastq) + default_location = ( + if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" - , + ), } call bowtie.spikein_SE as spikein_indv_map { input: fastqfile = eachfastq, index_files = actual_spikein_bowtie_index, metricsfile = spikein_indv_bfs.metrics_out, - default_location = if (one_fastq) + default_location = ( + if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" - , + ), } } @@ -403,14 +410,16 @@ workflow seaseq { call samtools.mergebam { input: bamfiles = indv_mapping.sorted_bam, metricsfiles = indv_bfs.metrics_out, - default_location = if defined(results_name) + default_location = ( + if defined(results_name) then results_name + "/BAM_files" else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files" - , - outputfile = if defined(results_name) + ), + outputfile = ( + if defined(results_name) then results_name + ".sorted.bam" else "AllMerge_" + length(fastqfiles) + "_mapped.sorted.bam" - , + ), } call fastqc.fastqc as mergebamfqc { input: From f2b54535383944e547fc39a9ba35a08bd499fc3b Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 11 Oct 2024 20:37:31 -0400 Subject: [PATCH 30/60] Update expr.rs --- wdl-format/src/v1/expr.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index 42042f9df..88a881600 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -670,9 +670,10 @@ pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream Date: Sat, 12 Oct 2024 08:28:17 -0400 Subject: [PATCH 31/60] revise: don't line split empty meta items --- wdl-format/src/v1/meta.rs | 20 +++++++++++++++---- .../source.formatted.wdl | 6 ++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/wdl-format/src/v1/meta.rs b/wdl-format/src/v1/meta.rs index e6fa8daa4..ef1b12cbb 100644 --- a/wdl-format/src/v1/meta.rs +++ b/wdl-format/src/v1/meta.rs @@ -22,7 +22,6 @@ pub fn format_metadata_array(element: &FormatElement, stream: &mut TokenStream

>(); + let empty = items.is_empty(); + if !empty { + stream.increment_indent(); + } + let mut commas = commas.iter(); for item in items { (&item).write(stream); @@ -51,7 +55,9 @@ pub fn format_metadata_array(element: &FormatElement, stream: &mut TokenStream

>(); + let empty = items.is_empty(); + if !empty { + stream.increment_indent(); + } + let mut commas = commas.iter(); for item in items { (&item).write(stream); @@ -92,7 +102,9 @@ pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream< stream.end_line(); } - stream.decrement_indent(); + if !empty { + stream.decrement_indent(); + } (&close_brace.expect("metadata object close brace")).write(stream); } diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl index 19d6e391c..5cc8e7923 100644 --- a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl +++ b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -30,13 +30,11 @@ workflow test_wf { } k: [ { - a: { - }, + a: {}, b: 0, c: "", d: "", - e: [ - ], + e: [], }, { x: [ From 9911f52ed801fff6c60dfbbdc6d7b78fb01710a9 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Sat, 12 Oct 2024 12:38:51 -0400 Subject: [PATCH 32/60] feat: AssertConsumedIter --- wdl-format/src/element.rs | 65 ++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index 695762a5d..d6410fa1d 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -1,14 +1,46 @@ //! Elements used during formatting. -use std::collections::HashMap; +use std::iter::Peekable; use nonempty::NonEmpty; use wdl_ast::Element; use wdl_ast::Node; -use wdl_ast::SyntaxKind; pub mod node; +/// An iterator that asserts that all items have been consumed when dropped. +pub struct AssertConsumedIter(Peekable); + +impl AssertConsumedIter +where + I: Iterator, +{ + /// Creates a new [`AssertConsumedIter`]. + pub fn new(iter: I) -> Self { + Self(iter.peekable()) + } +} + +impl Iterator for AssertConsumedIter +where + I: Iterator, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl Drop for AssertConsumedIter +where + I: Iterator, +{ + fn drop(&mut self) { + assert!(self.0.peek().is_none(), "not all iterator items consumed!"); + } +} + /// A formattable element. #[derive(Clone, Debug)] pub struct FormatElement { @@ -31,35 +63,10 @@ impl FormatElement { } /// Gets the children for this node. - pub fn children(&self) -> Option> { + pub fn children(&self) -> Option>> { self.children .as_ref() - .map(|children| children.into_iter().map(|child| &**child)) - } - - /// Collects all of the children into a hashmap based on their - /// [`SyntaxKind`]. This is often useful when formatting if you want to, - /// say, iterate through all children of a certain kind. - /// - /// # Notes - /// - /// * This clones the underlying children. It's meant to be a cheap clone, - /// but you should be aware of the (relatively small) performance hit. - pub fn children_by_kind(&self) -> HashMap> { - let mut results = HashMap::new(); - - if let Some(children) = self.children() { - for child in children { - results - .entry(child.element().kind()) - .or_insert(Vec::new()) - // NOTE: this clone is very cheap, as the underlying - // elements are mostly reference counts. - .push(child.to_owned()) - } - } - - results + .map(|children| AssertConsumedIter::new(children.iter().map(|c| c.as_ref()))) } } From 83b4757753c549183489958fa421e2e57b2a596e Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Sat, 12 Oct 2024 13:19:02 -0400 Subject: [PATCH 33/60] tests: test for unconsumed children --- wdl-format/src/element.rs | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index d6410fa1d..623a3fb4f 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -156,9 +156,7 @@ workflow bar # This is an inline comment on the workflow ident. let format_element = Node::Ast(document).into_format_element(); let mut children = format_element.children().unwrap(); - //////////////////////////////////////////////////////////////////////////////// // Version statement - //////////////////////////////////////////////////////////////////////////////// let version = children.next().expect("version statement element"); assert_eq!( @@ -176,9 +174,7 @@ workflow bar # This is an inline comment on the workflow ident. SyntaxKind::Version ); - //////////////////////////////////////////////////////////////////////////////// // Task Definition - //////////////////////////////////////////////////////////////////////////////// let task = children.next().expect("task element"); assert_eq!( @@ -208,9 +204,7 @@ workflow bar # This is an inline comment on the workflow ident. assert!(task_children.next().is_none()); - //////////////////////////////////////////////////////////////////////////////// // Workflow Definition - //////////////////////////////////////////////////////////////////////////////// let workflow = children.next().expect("workflow element"); assert_eq!( @@ -245,4 +239,28 @@ workflow bar # This is an inline comment on the workflow ident. assert!(workflow_children.next().is_none()); } + + #[test] + #[should_panic] + fn unconsumed_children_panic() { + let (document, diagnostics) = Document::parse( + "## WDL +version 1.2 # This is a comment attached to the version. + +# This is a comment attached to the task keyword. +task foo # This is an inline comment on the task ident. +{ + +} # This is an inline comment on the task close brace.", + ); + + assert!(diagnostics.is_empty()); + let document = document.ast().into_v1().unwrap(); + + let format_element = Node::Ast(document).into_format_element(); + fn inner(format_element: &crate::element::FormatElement) { + let mut _children = format_element.children().unwrap(); + } + inner(&format_element); + } } From 9f46bd46da36381a614ed4c4bceb9daae755a565 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 09:27:16 -0400 Subject: [PATCH 34/60] chore: cleanup --- wdl-format/src/element.rs | 2 + wdl-format/src/token.rs | 3 +- wdl-format/src/token/post.rs | 2 +- wdl-format/src/v1.rs | 23 +++--- wdl-format/src/v1/expr.rs | 72 ++++++++++--------- wdl-format/src/v1/meta.rs | 111 +++++++++++++++++------------ wdl-format/src/v1/struct.rs | 39 +++++----- wdl-format/src/v1/task.rs | 40 ++++++----- wdl-format/src/v1/workflow/call.rs | 3 +- 9 files changed, 163 insertions(+), 132 deletions(-) diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index 623a3fb4f..1a8545367 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -91,6 +91,8 @@ impl AstElementFormatExt for Element { } /// Collates the children of a particular node. +/// +/// This function ignores trivia. fn collate(node: &Node) -> Option>> { let mut results = Vec::new(); let stream = node.syntax().children_with_tokens().filter_map(|syntax| { diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index a5ec1b649..237065763 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -71,8 +71,7 @@ impl IntoIterator for TokenStream { /// The kind of comment. #[derive(Debug, Eq, PartialEq)] pub enum Comment { - /// A comment on it's own line, indented to the same level as the code - /// following it. + /// A comment on it's own line. Preceding(String), /// A comment on the same line as the code preceding it. diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index fac2d6aeb..d5923bf95 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -46,7 +46,7 @@ impl std::fmt::Display for PostToken { match self { PostToken::Space => write!(f, "{SPACE}"), PostToken::Newline => write!(f, "{NEWLINE}"), - PostToken::Indent => write!(f, " "), // 4 spaces TODO replace + PostToken::Indent => write!(f, " "), // TODO(af): Make this configurable. PostToken::Literal(value) => write!(f, "{value}"), } } diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs index a66c15f6f..a211a620d 100644 --- a/wdl-format/src/v1.rs +++ b/wdl-format/src/v1.rs @@ -90,21 +90,16 @@ pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream>(); + + for child in children { + match child.element().kind() { + SyntaxKind::BoundDeclNode | SyntaxKind::UnboundDeclNode => inputs.push(child), + SyntaxKind::CloseBrace => close_brace = Some(child), + _ => panic!("unexpected input section child"), + } + } // TODO: sort inputs for input in inputs { diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index 88a881600..1285c5f8a 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -273,21 +273,23 @@ pub fn format_literal_array(element: &FormatElement, stream: &mut TokenStream { close_bracket = Some(child.to_owned()); - false - } else if child.element().kind() == SyntaxKind::Comma { + } + SyntaxKind::Comma => { commas.push(child.to_owned()); - false - } else { - true } - }) - .collect::>(); + _ => { + items.push(child.to_owned()); + } + } + } let empty = items.is_empty(); if !empty { @@ -336,21 +338,23 @@ pub fn format_literal_map(element: &FormatElement, stream: &mut TokenStream { close_brace = Some(child.to_owned()); - false - } else if child.element().kind() == SyntaxKind::Comma { + } + SyntaxKind::Comma => { commas.push(child.to_owned()); - false - } else { - true } - }) - .collect::>(); + _ => { + items.push(child.to_owned()); + } + } + } let mut commas = commas.iter(); for item in items { @@ -394,21 +398,23 @@ pub fn format_literal_object(element: &FormatElement, stream: &mut TokenStream

{ close_brace = Some(child.to_owned()); - false - } else if child.element().kind() == SyntaxKind::Comma { + } + SyntaxKind::Comma => { commas.push(child.to_owned()); - false - } else { - true } - }) - .collect::>(); + _ => { + members.push(child.to_owned()); + } + } + } let mut commas = commas.iter(); for member in members { diff --git a/wdl-format/src/v1/meta.rs b/wdl-format/src/v1/meta.rs index ef1b12cbb..c7966bf54 100644 --- a/wdl-format/src/v1/meta.rs +++ b/wdl-format/src/v1/meta.rs @@ -23,21 +23,23 @@ pub fn format_metadata_array(element: &FormatElement, stream: &mut TokenStream

{ + commas.push(child.clone()); } - }) - .collect::>(); + SyntaxKind::CloseBracket => { + close_bracket = Some(child.clone()); + } + _ => { + items.push(child.clone()); + } + } + } let empty = items.is_empty(); if !empty { @@ -69,22 +71,27 @@ pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream< assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); (&open_brace).write(stream); - let mut close_brace = None; + let mut items = Vec::new(); let mut commas = Vec::new(); - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::MetadataObjectItemNode { - true - } else if child.element().kind() == SyntaxKind::Comma { - commas.push(child.to_owned()); - false - } else { - assert!(child.element().kind() == SyntaxKind::CloseBrace); - close_brace = Some(child.to_owned()); - false + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataObjectItemNode => { + items.push(child.clone()); + } + SyntaxKind::Comma => { + commas.push(child.clone()); } - }) - .collect::>(); + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => unreachable!( + "unexpected metadata object child: {:?}", + child.element().kind() + ), + } + } let empty = items.is_empty(); if !empty { @@ -141,18 +148,23 @@ pub fn format_metadata_section(element: &FormatElement, stream: &mut TokenStream (&open_brace).write(stream); stream.increment_indent(); + let mut items = Vec::new(); let mut close_brace = None; - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::MetadataObjectItemNode { - true - } else { - assert!(child.element().kind() == SyntaxKind::CloseBrace); - close_brace = Some(child.to_owned()); - false + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataObjectItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); } - }) - .collect::>(); + _ => unreachable!( + "unexpected metadata section child: {:?}", + child.element().kind() + ), + } + } for item in items { (&item).write(stream); @@ -183,18 +195,23 @@ pub fn format_parameter_metadata_section( (&open_brace).write(stream); stream.increment_indent(); + let mut items = Vec::new(); let mut close_brace = None; - let items = children - .filter(|child| { - if child.element().kind() == SyntaxKind::MetadataObjectItemNode { - true - } else { - assert!(child.element().kind() == SyntaxKind::CloseBrace); - close_brace = Some(child.to_owned()); - false + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataObjectItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); } - }) - .collect::>(); + _ => unreachable!( + "unexpected parameter metadata section child: {:?}", + child.element().kind() + ), + } + } for item in items { (&item).write(stream); diff --git a/wdl-format/src/v1/struct.rs b/wdl-format/src/v1/struct.rs index eb2345363..0b2401025 100644 --- a/wdl-format/src/v1/struct.rs +++ b/wdl-format/src/v1/struct.rs @@ -104,36 +104,41 @@ pub fn format_literal_struct(element: &FormatElement, stream: &mut TokenStream

{ + members.push(child.clone()); } - }) - .collect::>(); + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in literal struct: {:?}", + child.element().kind() + ); + } + } + } let mut commas = commas.iter(); for member in members { (&member).write(stream); if let Some(comma) = commas.next() { (comma).write(stream); - stream.end_line(); } else { stream.push_literal(",".to_string(), SyntaxKind::Comma); - stream.end_line(); } + stream.end_line(); } stream.decrement_indent(); diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs index c17cf6d2b..d46928e60 100644 --- a/wdl-format/src/v1/task.rs +++ b/wdl-format/src/v1/task.rs @@ -167,24 +167,30 @@ pub fn format_command_section(element: &FormatElement, stream: &mut TokenStream< // Technically there's no trivia inside the command section, // so we don't want to increment indent here. // All the indentation should be handled by the command text itself. + // TODO: multi-line placeholders need better formatting for child in children { - let kind = child.element().kind(); - if kind == SyntaxKind::CloseBrace { - stream.push_literal_in_place_of_token( - child - .element() - .as_token() - .expect("close brace should be token"), - ">>>".to_string(), - ); - } else if kind == SyntaxKind::CloseHeredoc { - (&child).write(stream); - } else { - assert!(matches!( - kind, - SyntaxKind::LiteralCommandText | SyntaxKind::PlaceholderNode - )); - (&child).write(stream); + match child.element().kind() { + SyntaxKind::CloseBrace => { + stream.push_literal_in_place_of_token( + child + .element() + .as_token() + .expect("close brace should be token"), + ">>>".to_string(), + ); + } + SyntaxKind::CloseHeredoc => { + (&child).write(stream); + } + SyntaxKind::LiteralCommandText | SyntaxKind::PlaceholderNode => { + (&child).write(stream); + } + _ => { + unreachable!( + "unexpected child in command section: {:?}", + child.element().kind() + ); + } } } stream.end_line(); diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs index ce3956972..316da7c24 100644 --- a/wdl-format/src/v1/workflow/call.rs +++ b/wdl-format/src/v1/workflow/call.rs @@ -36,7 +36,8 @@ pub fn format_call_input_item(element: &FormatElement, stream: &mut TokenStream< let name = children.next().expect("call input item name"); (&name).write(stream); - // Don't call end_word() here in case the name is alone + // Don't call end_word() here in case the name is alone in which case it should + // be followed by a comma. if let Some(equals) = children.next() { stream.end_word(); From 2cf58149b5be7df5ae82279b6bc5934f9af19ba3 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 09:35:22 -0400 Subject: [PATCH 35/60] docs: add docstrings --- wdl-format/tests/format.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/wdl-format/tests/format.rs b/wdl-format/tests/format.rs index aeaecf553..43a466cf6 100644 --- a/wdl-format/tests/format.rs +++ b/wdl-format/tests/format.rs @@ -5,9 +5,9 @@ //! Each directory is expected to contain: //! //! * `source.wdl` - the test input source to parse. -//! * `source.formatted` - the expected formatted output. +//! * `source.formatted.wdl` - the expected formatted output. //! -//! The `source.formatted` file may be automatically generated or updated by +//! The `source.formatted.wdl` file may be automatically generated or updated by //! setting the `BLESS` environment variable when running this test. use std::collections::HashSet; @@ -33,6 +33,7 @@ use wdl_ast::Node; use wdl_format::Formatter; use wdl_format::element::node::AstNodeFormatExt; +/// Find all the tests in the `tests/format` directory. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -60,6 +61,7 @@ fn find_tests() -> Vec { tests } +/// Format a list of diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -76,6 +78,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compare the result of a test to the expected result. fn compare_result(path: &Path, result: &str) -> Result<(), String> { if env::var_os("BLESS").is_some() { fs::write(path, &result).map_err(|e| { @@ -106,6 +109,7 @@ fn compare_result(path: &Path, result: &str) -> Result<(), String> { Ok(()) } +/// Run a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path).map_err(|e| { @@ -144,6 +148,7 @@ fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { Ok(()) } +/// Run all the tests. fn main() { let tests = find_tests(); println!("\nrunning {} tests\n", tests.len()); From ba3d0fe16d760f5f493a2a5146e47f4b8313aa52 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 10:21:48 -0400 Subject: [PATCH 36/60] chore: rm wdl-config --- wdl-config/src/loader.rs | 127 --------------------------------------- 1 file changed, 127 deletions(-) delete mode 100644 wdl-config/src/loader.rs diff --git a/wdl-config/src/loader.rs b/wdl-config/src/loader.rs deleted file mode 100644 index 6d7e605e2..000000000 --- a/wdl-config/src/loader.rs +++ /dev/null @@ -1,127 +0,0 @@ -use std::collections::VecDeque; -use std::convert::Infallible; -use std::path::PathBuf; - -use config::ConfigError; -use config::Environment; -use config::File; - -use crate::providers::EnvProvider; -use crate::providers::FileProvider; -use crate::BoxedProvider; -use crate::Config; -use crate::Provider; -use crate::CONFIG_SEARCH_PATHS; - -#[derive(Debug)] -pub enum Error { - /// An error from the `config` crate. - Config(ConfigError), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::Config(err) => write!(f, "`config` error: {err}"), - } - } -} - -impl std::error::Error for Error {} - -/// A [`Result`](std::result::Result) with an [`Error`]. -pub type Result = std::result::Result; - -pub struct Loader(VecDeque); - -impl Loader { - /// Creates an empty [`Loader`]. - pub fn empty() -> Self { - Self(VecDeque::new()) - } - - /// Adds the default configuration to the front of the provider stack. - pub fn with_default_configuration(mut self) -> Self { - // NOTE: default configuration should always be the first provider evaluated. - self.0.push_front(Config::default().into()); - self - } - - /// Adds a file to the search path of the [`Loader`]. - /// - /// Note that the file is not required to be present. - pub fn add_optional_file(mut self, path: PathBuf) -> Self { - self.0.push_back(FileProvider::optional(path).into()); - self - } - - /// Adds a file to the search path of the [`Loader`]. - /// - /// Note that the file is required to be present. - pub fn add_required_file(mut self, path: PathBuf) -> Self { - self.0.push_back(FileProvider::required(path).into()); - self - } - - /// Adds the default search paths to the [`Loader`]. - pub fn with_default_search_paths(mut self) -> Self { - for path in CONFIG_SEARCH_PATHS.clone().into_iter() { - self = self.add_optional_file(path); - } - - self - } - - /// Adds a new environment prefix to the [`Loader`]. - pub fn add_env_prefix(mut self, prefix: &str) -> Self { - self.0.push_back(EnvProvider::new(prefix).into()); - self - } - - /// Adds the default environment prefix to the [`Loader`]. - pub fn with_default_env_prefix(mut self) -> Self { - self.0.push_back(EnvProvider::default().into()); - self - } - - /// Gets a reference to the inner [`ConfigBuilder`]. - pub fn inner(&self) -> &VecDeque { - &self.0 - } - - /// Consumes `self` and returns the inner [`ConfigBuilder`]. - pub fn into_inner(self) -> VecDeque { - self.0 - } - - /// Consumes `self` and attempts to load the [`Config`]. - pub fn try_load(self) -> std::result::Result> { - for provider in self.0 { - let config = provider.provide().map_err(|e| ); - } - - self.0 - .build() - .map_err(Error::Config)? - .try_deserialize() - .map_err(Error::Config) - } -} - -impl Default for Loader { - fn default() -> Self { - Self::empty() - .with_default_search_paths() - .with_default_env_prefix() - } -} - -#[cfg(test)] -mod tests { - use crate::Loader; - - #[test] - fn an_empty_loader_unwraps() { - Loader::empty(); - } -} From 9f80c4ae0dbd8d5eb02f75579efa88f5da66b503 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 10:22:03 -0400 Subject: [PATCH 37/60] chore: clean up tree.rs --- wdl-grammar/src/tree.rs | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/wdl-grammar/src/tree.rs b/wdl-grammar/src/tree.rs index 08fd2490e..103c7bcad 100644 --- a/wdl-grammar/src/tree.rs +++ b/wdl-grammar/src/tree.rs @@ -720,7 +720,7 @@ impl fmt::Debug for SyntaxTree { /// [`SyntaxToken`]. /// /// Whitespace is only considered substantial if it contains more than one -/// newline and is between comments. Comments are always considered substantial. +/// newline. Comments are always considered substantial. fn gather_substantial_trivia( source: &SyntaxToken, direction: Direction, @@ -796,22 +796,6 @@ fn gather_substantial_trivia( }; } - // // Remove leading and trailing whitespace from results. - // while let Some(t) = results.front() { - // if t.kind() == SyntaxKind::Whitespace { - // results.pop_front(); - // } else { - // break; - // } - // } - // while let Some(t) = results.back() { - // if t.kind() == SyntaxKind::Whitespace { - // results.pop_back(); - // } else { - // break; - // } - // } - // NOTE: most of the time, this conversion will be O(1). Occassionally // it will be O(n). No allocations will ever be done. Thus, the // ammortized cost of this is quite cheap. From 7b5faf85ba36f109775e5debe6fea5a285eb4547 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 10:36:27 -0400 Subject: [PATCH 38/60] Update stdlib.rs --- wdl-analysis/src/stdlib.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/wdl-analysis/src/stdlib.rs b/wdl-analysis/src/stdlib.rs index 63ffadf0d..3ec266acc 100644 --- a/wdl-analysis/src/stdlib.rs +++ b/wdl-analysis/src/stdlib.rs @@ -2445,12 +2445,10 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { functions .insert( "select_first", - PolymorphicFunction::new(SupportedVersion::V1(V1::Zero), vec![ - FunctionSignature::builder() - .type_parameter("X", OptionalTypeConstraint) - .parameter(GenericArrayType::non_empty(GenericType::Parameter("X"))) - .ret(GenericType::UnqualifiedParameter("X")) - .build(), + // This differs from the definition of `select_first` in that we can have a single + // signature of `X select_first(Array[X?], [X])`. + MonomorphicFunction::new( + SupportedVersion::V1(V1::Zero), FunctionSignature::builder() .type_parameter("X", OptionalTypeConstraint) .required(1) @@ -2458,7 +2456,7 @@ pub static STDLIB: LazyLock = LazyLock::new(|| { .parameter(GenericType::UnqualifiedParameter("X")) .ret(GenericType::UnqualifiedParameter("X")) .build(), - ]) + ) .into(), ) .is_none() From fe317bee3315346b9873efe9b7ba4b2df0892af5 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 10:49:48 -0400 Subject: [PATCH 39/60] Update Gauntlet.toml --- Gauntlet.toml | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/Gauntlet.toml b/Gauntlet.toml index 99f583124..3ffb659b2 100644 --- a/Gauntlet.toml +++ b/Gauntlet.toml @@ -233,7 +233,7 @@ permalink = "https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/blob/1cf2b [[diagnostics]] document = "PacificBiosciences/HiFi-human-WGS-WDL:/workflows/tertiary_analysis/tertiary_analysis.wdl" -message = "tertiary_analysis.wdl:46:38: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Array[IndexData]]+`" +message = "tertiary_analysis.wdl:46:38: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Array[IndexData]]+`" permalink = "https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/blob/1cf2b2e80024290d0ec1ea93b6a279ea2de519b0/workflows/tertiary_analysis/tertiary_analysis.wdl/#L46" [[diagnostics]] @@ -893,7 +893,7 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026 [[diagnostics]] document = "biowdl/tasks:/sambamba.wdl" -message = "sambamba.wdl:157:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "sambamba.wdl:157:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026452d5dbae2/sambamba.wdl/#L157" [[diagnostics]] @@ -928,7 +928,7 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026 [[diagnostics]] document = "biowdl/tasks:/samtools.wdl" -message = "samtools.wdl:470:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "samtools.wdl:470:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026452d5dbae2/samtools.wdl/#L470" [[diagnostics]] @@ -968,7 +968,7 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026 [[diagnostics]] document = "biowdl/tasks:/umi.wdl" -message = "umi.wdl:39:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "umi.wdl:39:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026452d5dbae2/umi.wdl/#L39" [[diagnostics]] @@ -1528,7 +1528,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" -message = "BenchmarkVCFs.wdl:91:54: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]`" +message = "BenchmarkVCFs.wdl:91:54: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L91" [[diagnostics]] @@ -1683,7 +1683,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/FunctionalEquivalence/subworkflows/FEEvaluation.wdl" -message = "FEEvaluation.wdl:124:35: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" +message = "FEEvaluation.wdl:124:35: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/FunctionalEquivalence/subworkflows/FEEvaluation.wdl/#L124" [[diagnostics]] @@ -1743,7 +1743,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl" -message = "Glimpse2ImputationAndCheckQC.wdl:70:39: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "Glimpse2ImputationAndCheckQC.wdl:70:39: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl/#L70" [[diagnostics]] @@ -1768,7 +1768,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl" -message = "Glimpse2ImputationAndCheckQC.wdl:83:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "Glimpse2ImputationAndCheckQC.wdl:83:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/GlimpseImputationPipeline/Glimpse2ImputationAndCheckQC.wdl/#L83" [[diagnostics]] @@ -1983,7 +1983,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/ImputationPipeline/Validation/SubsetWeightSet.wdl" -message = "SubsetWeightSet.wdl:28:66: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[SelfExclusiveSites]+`" +message = "SubsetWeightSet.wdl:28:66: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[SelfExclusiveSites]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/ImputationPipeline/Validation/SubsetWeightSet.wdl/#L28" [[diagnostics]] @@ -2023,7 +2023,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/LongReadRNABenchmark/IsoformDiscoveryBenchmarkTasks.wdl" -message = "IsoformDiscoveryBenchmarkTasks.wdl:69:32: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "IsoformDiscoveryBenchmarkTasks.wdl:69:32: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/LongReadRNABenchmark/IsoformDiscoveryBenchmarkTasks.wdl/#L69" [[diagnostics]] @@ -2078,7 +2078,7 @@ permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a000 [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/MatchFingerprints.wdl" -message = "MatchFingerprints.wdl:67:33: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Array[String]]`" +message = "MatchFingerprints.wdl:67:33: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Array[String]]`" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/MatchFingerprints.wdl/#L67" [[diagnostics]] @@ -2443,37 +2443,37 @@ permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e2955 [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:91:34: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:91:34: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L91" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:92:48: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:92:48: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L92" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:93:46: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:93:46: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L93" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:94:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:94:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L94" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:95:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:95:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L95" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:96:38: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:96:38: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L96" [[diagnostics]] document = "broadinstitute/warp:/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl" -message = "BroadInternalUltimaGenomics.wdl:97:44: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "BroadInternalUltimaGenomics.wdl:97:44: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl/#L97" [[diagnostics]] @@ -4138,7 +4138,7 @@ permalink = "https://github.com/chanzuckerberg/czid-workflows/blob/a04293a527117 [[diagnostics]] document = "chanzuckerberg/czid-workflows:/workflows/bulk-download/run.wdl" -message = "run.wdl:91:31: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]`" +message = "run.wdl:91:31: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]`" permalink = "https://github.com/chanzuckerberg/czid-workflows/blob/a04293a5271176885ce7f876b6353b20da3f7b98/workflows/bulk-download/run.wdl/#L91" [[diagnostics]] @@ -4418,7 +4418,7 @@ permalink = "https://github.com/stjudecloud/workflows/blob/a56ad9b8c7de5c9b13350 [[diagnostics]] document = "stjudecloud/workflows:/workflows/chipseq/chipseq-standard.wdl" -message = "chipseq-standard.wdl:110:45: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Array[File]]+`" +message = "chipseq-standard.wdl:110:45: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Array[File]]+`" permalink = "https://github.com/stjudecloud/workflows/blob/a56ad9b8c7de5c9b13350f465c1543d0cb0b6b1a/workflows/chipseq/chipseq-standard.wdl/#L110" [[diagnostics]] @@ -4688,27 +4688,27 @@ permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be240 [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:33:58: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "wf_nextclade_addToRefTree.wdl:33:58: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L33" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:34:53: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "wf_nextclade_addToRefTree.wdl:34:53: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L34" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:35:51: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "wf_nextclade_addToRefTree.wdl:35:51: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L35" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:36:52: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[File]+`" +message = "wf_nextclade_addToRefTree.wdl:36:52: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L36" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl" -message = "wf_nextclade_addToRefTree.wdl:37:57: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[String]+`" +message = "wf_nextclade_addToRefTree.wdl:37:57: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[String]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl/#L37" [[diagnostics]] @@ -4953,12 +4953,12 @@ permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be240 [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl" -message = "wf_theiaeuk_illumina_pe.wdl:118:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" +message = "wf_theiaeuk_illumina_pe.wdl:118:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl/#L118" [[diagnostics]] document = "theiagen/public_health_bioinformatics:/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl" -message = "wf_theiaeuk_illumina_pe.wdl:127:40: error: type mismatch: argument to function `select_first` expects type `Array[X]+` where `X`: any optional type or `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" +message = "wf_theiaeuk_illumina_pe.wdl:127:40: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[Int]+`" permalink = "https://github.com/theiagen/public_health_bioinformatics/blob/be24047e2b64d02a187824909b91d04bda6074d8/workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl/#L127" [[diagnostics]] From a89b3ee7456eaeb6bfc7d45c100babe2d9256bfa Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 11:51:48 -0400 Subject: [PATCH 40/60] fix: don't reference TaskHints in workflow sections --- wdl-ast/src/v1/workflow.rs | 31 ----------------------------- wdl-lint/src/rules/section_order.rs | 2 +- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/wdl-ast/src/v1/workflow.rs b/wdl-ast/src/v1/workflow.rs index 2a164a1c1..09aac56c1 100644 --- a/wdl-ast/src/v1/workflow.rs +++ b/wdl-ast/src/v1/workflow.rs @@ -21,7 +21,6 @@ use crate::WorkflowDescriptionLanguage; use crate::support::child; use crate::support::children; use crate::token; -use crate::v1::TaskHintsSection; /// Represents a workflow definition. #[derive(Clone, Debug, PartialEq, Eq)] @@ -116,8 +115,6 @@ pub enum WorkflowItem { Metadata(MetadataSection), /// The item is a parameter meta section. ParameterMetadata(ParameterMetadataSection), - /// The item is a task hints section. - TaskHints(TaskHintsSection), /// The item is a workflow hints section. WorkflowHints(WorkflowHintsSection), /// The item is a private bound declaration. @@ -173,9 +170,6 @@ impl WorkflowItem { SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), )), - SyntaxKind::TaskHintsSectionNode => Some(Self::TaskHints( - TaskHintsSection::cast(syntax).expect("task hints section to cast"), - )), SyntaxKind::WorkflowHintsSectionNode => Some(Self::WorkflowHints( WorkflowHintsSection::cast(syntax).expect("workflow hints section to cast"), )), @@ -196,7 +190,6 @@ impl WorkflowItem { Self::Call(element) => element.syntax(), Self::Metadata(element) => element.syntax(), Self::ParameterMetadata(element) => element.syntax(), - Self::TaskHints(element) => element.syntax(), Self::WorkflowHints(element) => element.syntax(), Self::Declaration(element) => element.syntax(), } @@ -374,30 +367,6 @@ impl WorkflowItem { } } - /// Attempts to get a reference to the inner [`TaskHintsSection`]. - /// - /// * If `self` is a [`WorkflowItem::TaskHints`], then a reference to the - /// inner [`TaskHintsSection`] is returned wrapped in [`Some`]. - /// * Else, [`None`] is returned. - pub fn as_task_hints_section(&self) -> Option<&TaskHintsSection> { - match self { - Self::TaskHints(task_hints_section) => Some(task_hints_section), - _ => None, - } - } - - /// Consumes `self` and attempts to return the inner [`TaskHintsSection`]. - /// - /// * If `self` is a [`WorkflowItem::Hints`], then the inner - /// [`TaskHintsSection`] is returned wrapped in [`Some`]. - /// * Else, [`None`] is returned. - pub fn into_task_hints_section(self) -> Option { - match self { - Self::TaskHints(task_hints_section) => Some(task_hints_section), - _ => None, - } - } - /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. /// /// * If `self` is a [`WorkflowItem::TaskHints`], then a reference to the diff --git a/wdl-lint/src/rules/section_order.rs b/wdl-lint/src/rules/section_order.rs index d1e6adcb7..7070ad9e9 100644 --- a/wdl-lint/src/rules/section_order.rs +++ b/wdl-lint/src/rules/section_order.rs @@ -218,7 +218,7 @@ impl Visitor for SectionOrderingRule { WorkflowItem::Output(_) if encountered <= State::Output => { encountered = State::Output; } - WorkflowItem::TaskHints(_) if encountered <= State::Hints => { + WorkflowItem::WorkflowHints(_) if encountered <= State::Hints => { encountered = State::Hints; } _ => { From 34ddab3da6bfb78b58d3375d66bdee7f00756b13 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 11:59:17 -0400 Subject: [PATCH 41/60] docs: CHANGELOG updates --- wdl-ast/CHANGELOG.md | 4 ++++ wdl-format/CHANGELOG.md | 12 ++++++++++++ wdl-grammar/CHANGELOG.md | 5 +++++ wdl/CHANGELOG.md | 1 + 4 files changed, 22 insertions(+) create mode 100644 wdl-format/CHANGELOG.md diff --git a/wdl-ast/CHANGELOG.md b/wdl-ast/CHANGELOG.md index e631c27b3..935e56566 100644 --- a/wdl-ast/CHANGELOG.md +++ b/wdl-ast/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Changed + +* Introduce a guarantee that each CST element (node or token) has one and only one analogous AST element ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) + ### Fixed * Detect duplicate call inputs ([#199](https://github.com/stjude-rust-labs/wdl/pull/199)). diff --git a/wdl-format/CHANGELOG.md b/wdl-format/CHANGELOG.md new file mode 100644 index 000000000..5ff4dfedf --- /dev/null +++ b/wdl-format/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added + +* Adds the initial version of the crate. diff --git a/wdl-grammar/CHANGELOG.md b/wdl-grammar/CHANGELOG.md index 9f26fdfe1..0f442eea4 100644 --- a/wdl-grammar/CHANGELOG.md +++ b/wdl-grammar/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +* `SyntaxExt` (for better handling of siblings) and `SyntaxTokenExt` (for handling of associated trivia) traits ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) +* `is_trivia()` and `is_symbolic()` methods for `SyntaxKind` ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) + ### Fixed * Fixed parsing of workflow `hints` section to no longer accept expressions ([#176](https://github.com/stjude-rust-labs/wdl/pull/176)) diff --git a/wdl/CHANGELOG.md b/wdl/CHANGELOG.md index 5af004d01..6f96cb8ff 100644 --- a/wdl/CHANGELOG.md +++ b/wdl/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Added a `format` command to the `wdl` CLI tool ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) * Added a `verbosity` flag to the `wdl` CLI tool ([#199](https://github.com/stjude-rust-labs/wdl/pull/199)). ## 0.8.0 - 09-16-2024 From 68a6b1c0358c4ab037d11f6bd62d9a79af6b2ada Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 12:22:03 -0400 Subject: [PATCH 42/60] Delete engine.rs --- wdl-analysis/src/engine.rs | 792 ------------------------------------- 1 file changed, 792 deletions(-) delete mode 100644 wdl-analysis/src/engine.rs diff --git a/wdl-analysis/src/engine.rs b/wdl-analysis/src/engine.rs deleted file mode 100644 index 3c942b122..000000000 --- a/wdl-analysis/src/engine.rs +++ /dev/null @@ -1,792 +0,0 @@ -//! Implementation of the analysis engine. - -use std::cell::RefCell; -use std::collections::HashSet; -use std::fmt; -use std::fs; -use std::path::Path; -use std::sync::Arc; -use std::time::Duration; -use std::time::Instant; - -use anyhow::anyhow; -use anyhow::bail; -use anyhow::Context; -use anyhow::Result; -use futures::stream::FuturesUnordered; -use futures::Future; -use futures::StreamExt; -use parking_lot::RwLock; -use petgraph::algo::has_path_connecting; -use petgraph::algo::DfsSpace; -use petgraph::graph::NodeIndex; -use petgraph::stable_graph::StableDiGraph; -use petgraph::visit::Visitable; -use petgraph::Direction; -use reqwest::Client; -use rowan::GreenNode; -use tokio::runtime::Handle; -use tokio::sync::mpsc::unbounded_channel; -use tokio::sync::mpsc::UnboundedReceiver; -use tokio::sync::mpsc::UnboundedSender; -use tokio::sync::oneshot; -use tokio::task::JoinHandle; -use url::Url; -use wdl_ast::Ast; -use wdl_ast::AstToken; -use wdl_ast::Diagnostic; -use wdl_ast::SyntaxNode; -use wdl_ast::Validator; - -use crate::rayon::RayonHandle; -use crate::Document; -use crate::DocumentGraph; -use crate::DocumentId; -use crate::DocumentScope; - -/// The minimum number of milliseconds between analysis progress reports. -const MINIMUM_PROGRESS_MILLIS: u128 = 50; - -/// Represents the kind of analysis progress being reported. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ProgressKind { - /// The progress is for parsing documents. - Parsing, - /// The progress is for analyzing documents. - Analyzing, -} - -impl fmt::Display for ProgressKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Parsing => write!(f, "parsing"), - Self::Analyzing => write!(f, "analyzing"), - } - } -} - -/// Represents analysis state. -#[derive(Debug, Default)] - -pub(crate) struct State { - /// The document graph being built. - pub(crate) graph: DocumentGraph, - /// Represents dependency edges that, if they were added to the document - /// graph, would form a cycle. - /// - /// The first in the pair is the importing node and the second is the - /// imported node. - /// - /// This is used to break import cycles; when analyzing the document, if the - /// import exists in this set, a diagnostic will be added and the import - /// otherwise ignored. - pub(crate) cycles: HashSet<(NodeIndex, NodeIndex)>, - /// Space for DFS operations on the document graph. - space: DfsSpace as Visitable>::Map>, -} - -/// Represents the type for progress callbacks. -type ProgressCallback = dyn Fn(ProgressKind, usize, usize) + Send + Sync; - -/// Represents a request to perform analysis. -/// -/// This request is sent to the analysis queue for processing. -struct AnalysisRequest { - /// The identifiers of the documents to analyze. - documents: Vec>, - /// The progress callback to use for the request. - progress: Option>, - /// The sender for completing the analysis request. - completed: oneshot::Sender>, -} - -/// Represents the result of an analysis. -/// -/// Analysis results are cheap to clone. -#[derive(Debug, Clone)] -pub struct AnalysisResult { - /// The id of the analyzed document. - id: Arc, - /// The root node of the document. - /// - /// This is `None` if the document failed to be read. - root: Option, - /// The error encountered when trying to read the document. - /// - /// This is `None` if the document was read. - error: Option>, - /// The diagnostics for the document. - diagnostics: Arc<[Diagnostic]>, - /// The scope of the analyzed document. - scope: Arc, -} - -impl AnalysisResult { - /// Constructs a new analysis result for the given document. - pub(crate) fn new(document: &Document) -> Self { - let state = document.state.completed(); - Self { - id: document.id.clone(), - root: document.root.clone(), - error: document.error.clone(), - diagnostics: state.diagnostics.clone(), - scope: state.scope.clone(), - } - } - - /// Gets the identifier of the document that was analyzed. - pub fn id(&self) -> &DocumentId { - &self.id - } - - /// Gets the root node of the document that was analyzed. - /// - /// Returns `None` if the document could not be read. - pub fn root(&self) -> Option<&GreenNode> { - self.root.as_ref() - } - - /// Gets the error if the document could not be read. - /// - /// Returns `None` if the document was read. - pub fn error(&self) -> Option<&anyhow::Error> { - self.error.as_deref() - } - - /// Gets the diagnostics associated with the document. - pub fn diagnostics(&self) -> &[Diagnostic] { - &self.diagnostics - } - - /// Gets the scope of the analyzed document. - pub fn scope(&self) -> &DocumentScope { - &self.scope - } -} - -/// Represents a Workflow Description Language (WDL) analysis engine. -/// -/// By default, analysis parses documents, performs validation checks, resolves -/// imports, and performs type checking. -/// -/// Each analysis operation is processed in order of request; however, the -/// individual parsing, resolution, and analysis of documents is performed -/// across a thread pool. -#[derive(Debug)] -pub struct AnalysisEngine { - /// The document graph. - graph: Arc>, - /// The sender for sending analysis requests. - sender: UnboundedSender, - /// The join handle of the queue task. - queue: JoinHandle<()>, -} - -impl AnalysisEngine { - /// Creates a new analysis engine using a default validator. - /// - /// The engine must be constructed from the context of a Tokio runtime. - pub fn new() -> Result { - let graph: Arc> = Default::default(); - let (sender, queue) = Self::spawn_analysis_queue_task(graph.clone(), None); - Ok(Self { - graph, - sender, - queue, - }) - } - - /// Creates a new analysis engine with the given function that produces a - /// validator to use. - /// - /// The provided function will be called once per worker thread to - /// initialize a thread-local validator. - /// - /// The engine must be constructed from the context of a Tokio runtime. - pub fn new_with_validator(validator: V) -> Result - where - V: Fn() -> Validator + Send + Sync + 'static, - { - let graph: Arc> = Default::default(); - let (sender, queue) = - Self::spawn_analysis_queue_task(graph.clone(), Some(Arc::new(validator))); - Ok(Self { - graph, - sender, - queue, - }) - } - - /// Analyzes the given file system path. - /// - /// If the path is a directory, the directory will be recursively searched - /// for files with a `.wdl` extension to analyze. - /// - /// Otherwise, a single file is analyzed. - pub async fn analyze(&self, path: &Path) -> Vec { - let documents = Self::find_documents(path).await; - if documents.is_empty() { - log::info!( - "no WDL documents were found for path `{path}`", - path = path.display() - ); - return Vec::new(); - } - - let (tx, rx) = oneshot::channel(); - self.sender - .send(AnalysisRequest { - documents, - progress: None, - completed: tx, - }) - .expect("failed to send analysis request"); - - rx.await.expect("failed to receive analysis results") - } - - /// Analyzes the given file system path and reports progress to the given - /// callback. - /// - /// If the path is a directory, the directory will be recursively searched - /// for files with a `.wdl` extension to analyze. - /// - /// Otherwise, a single file is analyzed. - /// - /// Progress is reported to the provided callback function with a minimum - /// 50ms interval. - pub async fn analyze_with_progress(&self, path: &Path, progress: F) -> Vec - where - F: Fn(ProgressKind, usize, usize) + Send + Sync + 'static, - { - let documents = Self::find_documents(path).await; - if documents.is_empty() { - log::info!( - "no WDL documents were found for path `{path}`", - path = path.display() - ); - return Vec::new(); - } - - let (tx, rx) = oneshot::channel(); - self.sender - .send(AnalysisRequest { - documents, - progress: Some(Box::new(progress)), - completed: tx, - }) - .expect("failed to send analysis request"); - - rx.await.expect("failed to receive analysis results") - } - - /// Gets a previous analysis result for a file. - /// - /// Returns `None` if the file has not been analyzed yet. - pub fn result(&self, path: &Path) -> Option { - let id = DocumentId::try_from(path).ok()?; - let graph = self.graph.read(); - let index = graph.indexes.get(&id)?; - Some(AnalysisResult::new(&graph.inner[*index])) - } - - /// Shuts down the engine and waits for outstanding requests to complete. - pub async fn shutdown(self) { - drop(self.sender); - self.queue.await.expect("expected the queue to shut down"); - } - - /// Spawns the analysis queue task. - fn spawn_analysis_queue_task( - graph: Arc>, - validator: Option Validator + Send + Sync>>, - ) -> (UnboundedSender, JoinHandle<()>) { - let (tx, rx) = unbounded_channel::(); - let handle = tokio::spawn(Self::process_analysis_queue(graph, validator, rx)); - (tx, handle) - } - - /// Processes the analysis queue. - /// - /// The queue task processes analysis requests in the order of insertion - /// into the queue. - /// - /// It is also the only writer to the shared document graph. - async fn process_analysis_queue( - graph: Arc>, - validator: Option Validator + Send + Sync>>, - mut receiver: UnboundedReceiver, - ) { - log::info!("analysis queue has started"); - - let client = Client::default(); - while let Some(request) = receiver.recv().await { - log::info!( - "received request to analyze {count} document(s)", - count = request.documents.len() - ); - - // We start by populating the parse set with the request documents - // After each parse set completes, we search for imports to add to the parse set - // and continue until the parse set is empty; once the graph is built, we spawn - // analysis tasks to process every node in the graph. - let start = Instant::now(); - let mut state = State::default(); - let mut parse_set = request.documents.into_iter().collect::>(); - let mut requested = true; - let handle = Handle::current(); - while !parse_set.is_empty() { - let tasks = parse_set - .iter() - .map(|id| { - Self::spawn_parse_task(&handle, &client, &validator, id.clone(), requested) - }) - .collect::>(); - - // The remaining files to parse were not part of the request - requested = false; - - let parsed = Self::await_with_progress::<_, _, Vec<_>>( - ProgressKind::Parsing, - tasks, - &request.progress, - ) - .await; - - parse_set.clear(); - (state, parse_set) = Self::add_import_dependencies(state, parsed, parse_set).await; - } - - let total = state.graph.inner.node_count(); - let state = Self::spawn_analysis_tasks(state, &request.progress).await; - - // Spawn a task for merging the graph as this takes a lock - let graph = graph.clone(); - let results = RayonHandle::spawn(move || { - log::info!("merging document graphs"); - let mut graph = graph.write(); - graph.merge(state.graph) - }) - .await; - - log::info!( - "analysis request completed with {total} document(s) analyzed in {elapsed:?}", - elapsed = start.elapsed() - ); - - request - .completed - .send(results) - .expect("failed to send analysis results"); - } - - log::info!("analysis queue has shut down"); - } - - /// Finds documents for the given path. - /// - /// If the path is a directory, it is searched for `.wdl` files. - /// - /// Otherwise, returns a single identifier for the given path. - async fn find_documents(path: &Path) -> Vec> { - if path.is_dir() { - let pattern = format!("{path}/**/*.wdl", path = path.display()); - return RayonHandle::spawn(move || { - let options = glob::MatchOptions { - case_sensitive: true, - require_literal_separator: false, - require_literal_leading_dot: true, - }; - - match glob::glob_with(&pattern, options) { - Ok(paths) => paths - .filter_map(|p| match p { - Ok(path) => Some(Arc::new(DocumentId::try_from(path.as_path()).ok()?)), - Err(e) => { - log::error!("error while searching for WDL documents: {e}"); - None - } - }) - .collect(), - Err(e) => { - log::error!("error while searching for WDL documents: {e}"); - Vec::new() - } - } - }) - .await; - } - - DocumentId::try_from(path) - .map(|id| vec![Arc::new(id)]) - .unwrap_or_default() - } - - /// Awaits the given set of futures while providing progress to the given - /// callback. - async fn await_with_progress( - kind: ProgressKind, - tasks: FuturesUnordered, - progress: &Option>, - ) -> C - where - T: Future, - C: Extend + Default, - { - if tasks.is_empty() { - return Default::default(); - } - - let total = tasks.len(); - if let Some(progress) = &progress { - progress(kind, 0, total); - } - - let mut completed = 0; - let mut last_progress = Instant::now(); - let collection = tasks - .map(|r| { - completed += 1; - - if let Some(progress) = progress { - let now = Instant::now(); - if completed < total - && (now - last_progress).as_millis() > MINIMUM_PROGRESS_MILLIS - { - log::info!("{completed} out of {total} {kind} task(s) have completed"); - last_progress = now; - progress(kind, completed, total); - } - } - - r - }) - .collect() - .await; - - log::info!("{total} {kind} task(s) have completed"); - if let Some(progress) = &progress { - progress(kind, total, total); - } - - collection - } - - /// Spawns a parse task on a rayon thread. - fn spawn_parse_task( - handle: &Handle, - client: &Client, - validator: &Option Validator + Send + Sync>>, - id: Arc, - requested: bool, - ) -> RayonHandle { - thread_local! { - static VALIDATOR: RefCell> = const { RefCell::new(None) }; - } - - let handle = handle.clone(); - let client = client.clone(); - let validator = validator.clone(); - RayonHandle::spawn(move || { - VALIDATOR.with_borrow_mut(|v| { - let validator = v.get_or_insert_with(|| validator.map(|v| v()).unwrap_or_default()); - match Self::parse(&handle, &client, Some(validator), &id) { - Ok((root, diagnostics)) => { - Document::from_parse(id, root, diagnostics, requested) - } - Err(e) => { - log::warn!("{e:#}"); - Document::from_error(id, e, requested) - } - } - }) - }) - } - - /// Parses the given document by URI. - /// - /// If the URI is `http` or `https` scheme, it fetches the source from the - /// network. - /// - /// If the URI is `file` scheme, it reads the file from the local file - /// system. - /// - /// Returns the root node and diagnostics upon success or a single document - /// if there was a problem with accessing the document's source. - fn parse( - tokio: &Handle, - client: &Client, - validator: Option<&mut Validator>, - id: &DocumentId, - ) -> Result<(GreenNode, Vec)> { - let source = match id { - DocumentId::Path(path) => fs::read_to_string(path)?, - DocumentId::Uri(uri) => match uri.scheme() { - "https" | "http" => Self::download_source(tokio, client, uri)?, - "file" => { - let path = uri - .to_file_path() - .map_err(|_| anyhow!("invalid file URI `{uri}`"))?; - log::info!("reading document `{path}`", path = path.display()); - fs::read_to_string(&path)? - } - scheme => { - bail!("unsupported URI scheme `{scheme}`"); - } - }, - }; - - let (node, diagnostics) = Self::parse_source(id, &source, validator); - Ok((node, diagnostics)) - } - - /// Parses the given source and validates the result with the given - /// validator. - fn parse_source( - id: &DocumentId, - source: &str, - validator: Option<&mut Validator>, - ) -> (GreenNode, Vec) { - let start = Instant::now(); - let (document, mut diagnostics) = wdl_ast::Document::parse(source); - - if diagnostics.is_empty() { - if let Some(validator) = validator { - diagnostics.extend(validator.validate(&document).err().unwrap_or_default()); - } - } - - log::info!("parsing of `{id}` completed in {:?}", start.elapsed()); - (document.syntax().green().into(), diagnostics) - } - - /// Downloads the source of a `http` or `https` scheme URI. - /// - /// This makes a request on the provided tokio runtime to download the - /// source. - fn download_source(tokio: &Handle, client: &Client, uri: &Url) -> Result { - /// The timeout for downloading the source, in seconds. - const TIMEOUT_IN_SECS: u64 = 30; - - log::info!("downloading source from `{uri}`"); - - // TODO: we should be caching these responses on disk somewhere - tokio.block_on(async { - let resp = client - .get(uri.as_str()) - .timeout(Duration::from_secs(TIMEOUT_IN_SECS)) - .send() - .await?; - - let code = resp.status(); - if !code.is_success() { - bail!("server returned HTTP status {code}"); - } - - resp.text().await.context("failed to read response body") - }) - } - - /// Adds import dependencies of parsed documents to the state. - /// - /// This will add empty nodes to the graph for any missing imports and - /// populate the parse set with documents that need to be parsed. - async fn add_import_dependencies( - mut state: State, - parsed: Vec, - mut parse_set: HashSet>, - ) -> (State, HashSet>) { - RayonHandle::spawn(move || { - for document in parsed { - // Add the newly parsed document to the graph; if the document was previously - // added as an import dependency, it is replaced with the newly parsed document - let id = document.id.clone(); - state.graph.add_document(document); - - let (doc_index, document) = state - .graph - .document(&id) - .expect("document was just added to the state"); - let root = match &document.root { - Some(root) => root, - None => continue, - }; - - match wdl_ast::Document::cast(SyntaxNode::new_root(root.clone())) - .expect("root should cast") - .ast() - { - Ast::Unsupported => {} - Ast::V1(ast) => { - for import in ast.imports() { - let text = match import.uri().text() { - Some(text) => text, - None => continue, - }; - - let import_id = match DocumentId::relative_to(&id, text.as_str()) { - Ok(id) => Arc::new(id), - Err(_) => continue, - }; - - match state.graph.document(&import_id) { - Some((dep_index, _)) => { - // The dependency is already in the graph, so add a dependency - // edge; however, we must detect a cycle before doing so - if has_path_connecting( - &state.graph.inner, - doc_index, - dep_index, - Some(&mut state.space), - ) { - // Adding the edge would cause a cycle, so record the cycle - // instead - log::info!( - "an import cycle was detected between `{id}` and \ - `{import_id}`" - ); - state.cycles.insert((doc_index, dep_index)); - } else { - // The edge won't cause a cycle, so add it - log::info!( - "updating dependency edge from `{id}` to `{import_id}`" - ); - state.graph.inner.update_edge(dep_index, doc_index, ()); - } - } - None => { - // The dependency isn't in the graph; add a new node and - // dependency edge - log::info!( - "updating dependency edge from `{id}` to `{import_id}` \ - (added to parse queue)" - ); - let dep_index = state - .graph - .add_document(Document::new(import_id.clone(), false)); - state.graph.inner.update_edge(dep_index, doc_index, ()); - parse_set.insert(import_id); - } - } - } - } - } - } - - (state, parse_set) - }) - .await - } - - /// Spawns analysis tasks. - /// - /// Analysis tasks are spawned in topological order. - async fn spawn_analysis_tasks(state: State, progress: &Option>) -> State { - // As we're going to be analyzing on multiple threads, wrap the state with a - // `RwLock`. - let mut state = Arc::new(RwLock::new(state)); - let mut remaining: Option, ()>> = None; - let mut set = Vec::new(); - while remaining - .as_ref() - .map(|g| g.node_count() > 0) - .unwrap_or(true) - { - (state, remaining, set) = RayonHandle::spawn(move || { - // Insert a copy of the graph where we just map the nodes to the document - // identifiers; we need a copy as we are going to be removing nodes from the - // graph as we process them in topological order - let g = remaining.get_or_insert_with(|| { - state.read().graph.inner.map(|_, n| n.id.clone(), |_, _| ()) - }); - - // Build a set of nodes with no incoming edges - set.clear(); - for node in g.node_indices() { - if g.edges_directed(node, Direction::Incoming).next().is_none() { - set.push(node); - } - } - - // Remove the nodes we're about to analyze from the "remaining" graph - // This also removes the outgoing edges from those nodes - for index in &set { - g.remove_node(*index); - } - - (state, remaining, set) - }) - .await; - - let tasks = set - .iter() - .map(|index| { - let index = *index; - let state = state.clone(); - RayonHandle::spawn(move || Self::analyze_node(state, index)) - }) - .collect::>(); - - Self::await_with_progress::<_, _, Vec<_>>(ProgressKind::Analyzing, tasks, progress) - .await; - } - - // We're finished with the tasks; there should be no outstanding references to - // the state - Arc::into_inner(state) - .expect("only one reference should remain") - .into_inner() - } - - /// Analyzes a node in the document graph. - /// - /// This completes the analysis state of the node. - fn analyze_node(state: Arc>, index: NodeIndex) { - let (id, root) = { - // scope for read lock - let state = state.read(); - let node = &state.graph.inner[index]; - (node.id.clone(), node.root.clone()) - }; - - log::info!("analyzing `{id}`"); - let start = Instant::now(); - let (scope, diagnostics) = if let Some(root) = root { - let document = - wdl_ast::Document::cast(SyntaxNode::new_root(root)).expect("root should cast"); - let state = state.read(); - DocumentScope::new(&state, &id, &document) - } else { - (Default::default(), Default::default()) - }; - - { - // Scope for write lock - // Write the result of the analysis to the document - let mut state = state.write(); - let doc = &mut state.graph.inner[index]; - let state = doc.state.in_progress(); - - state.scope = scope; - if !diagnostics.is_empty() { - state.diagnostics.extend(diagnostics); - } - - // Complete the analysis of the document - doc.complete(); - } - - log::info!( - "analysis of `{id}` completed in {elapsed:?}", - elapsed = start.elapsed() - ) - } -} - -/// Constant that asserts `AnalysisEngine` is `Send + Sync`; if not, it fails to -/// compile. -const _: () = { - /// Helper that will fail to compile if T is not `Send + Sync`. - const fn _assert() {} - _assert::(); -}; From de613475053ec91690e2032f4d1b597787b3ed4c Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 14:21:00 -0400 Subject: [PATCH 43/60] Apply suggestions from code review Co-authored-by: Peter Huene Co-authored-by: Clay McLeod <3411613+claymcleod@users.noreply.github.com> --- wdl-ast/src/lib.rs | 14 -------------- wdl-ast/src/v1/decls.rs | 8 ++++---- wdl-ast/src/v1/expr.rs | 2 +- wdl-ast/src/v1/import.rs | 2 +- wdl-format/Cargo.toml | 2 +- wdl-format/src/config.rs | 2 +- wdl-format/src/element.rs | 13 ++++++++----- 7 files changed, 16 insertions(+), 27 deletions(-) diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index a04c8ff3f..6b6e06371 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -204,20 +204,6 @@ pub fn token_children(parent: &SyntaxNode) -> impl Iterator impl Iterator; -} - -impl> AstNodeChildrenExt for T { - fn children(&self) -> impl Iterator { - self.syntax() - .clone() - .children_with_tokens() - .map(|c| Token::cast(c).expect("element to cast to an AST element")) - } -} /// Represents the AST of a [Document]. /// diff --git a/wdl-ast/src/v1/decls.rs b/wdl-ast/src/v1/decls.rs index 8053cdbb7..f35d35cc5 100644 --- a/wdl-ast/src/v1/decls.rs +++ b/wdl-ast/src/v1/decls.rs @@ -726,7 +726,7 @@ impl Type { } } - /// Finds the first child that can be cast to an [`Type`]. + /// Finds the first child that can be cast to a [`Type`]. /// /// This is meant to emulate the functionality of /// [`rowan::ast::support::child`] without requiring [`Type`] to implement @@ -735,7 +735,7 @@ impl Type { syntax.children().find_map(Self::cast) } - /// Finds all children that can be cast to an [`Type`]. + /// Finds all children that can be cast to a [`Type`]. /// /// This is meant to emulate the functionality of /// [`rowan::ast::support::children`] without requiring [`Type`] to @@ -987,7 +987,7 @@ impl Decl { } } - /// Finds the first child that can be cast to an [`Decl`]. + /// Finds the first child that can be cast to a [`Decl`]. /// /// This is meant to emulate the functionality of /// [`rowan::ast::support::child`] without requiring [`Decl`] to implement @@ -996,7 +996,7 @@ impl Decl { syntax.children().find_map(Self::cast) } - /// Finds all children that can be cast to an [`Decl`]. + /// Finds all children that can be cast to a [`Decl`]. /// /// This is meant to emulate the functionality of /// [`rowan::ast::support::children`] without requiring [`Decl`] to diff --git a/wdl-ast/src/v1/expr.rs b/wdl-ast/src/v1/expr.rs index 5734aa22e..cdd36a238 100644 --- a/wdl-ast/src/v1/expr.rs +++ b/wdl-ast/src/v1/expr.rs @@ -169,7 +169,7 @@ impl Expr { ModuloExpr::cast(syntax).expect("modulo expr should cast"), )), SyntaxKind::ExponentiationExprNode => Some(Self::Exponentiation( - ExponentiationExpr::cast(syntax).expect("exponentation expr should cast"), + ExponentiationExpr::cast(syntax).expect("exponentiation expr should cast"), )), SyntaxKind::CallExprNode => Some(Self::Call( CallExpr::cast(syntax).expect("call expr should cast"), diff --git a/wdl-ast/src/v1/import.rs b/wdl-ast/src/v1/import.rs index 5d54f90d8..98bf70cfb 100644 --- a/wdl-ast/src/v1/import.rs +++ b/wdl-ast/src/v1/import.rs @@ -35,7 +35,7 @@ impl ImportStatement { child(&self.0).expect("import should have a URI") } - /// Gets the `import`` keyword of the import statement. + /// Gets the `import` keyword of the import statement. pub fn keyword(&self) -> ImportKeyword { token(&self.0).expect("import should have a keyword") } diff --git a/wdl-format/Cargo.toml b/wdl-format/Cargo.toml index b26d7e086..532b7b66f 100644 --- a/wdl-format/Cargo.toml +++ b/wdl-format/Cargo.toml @@ -12,7 +12,7 @@ wdl-ast = { path = "../wdl-ast", version = "0.7.1", features = ["codespan"] } nonempty.workspace = true [dev-dependencies] -pretty_assertions = { workspace = true } +pretty_assertions.workspace = true approx = { workspace = true } rayon = { workspace = true } colored = { workspace = true } diff --git a/wdl-format/src/config.rs b/wdl-format/src/config.rs index 15ba2e5de..942715f9b 100644 --- a/wdl-format/src/config.rs +++ b/wdl-format/src/config.rs @@ -1,4 +1,4 @@ -//! Configuration for formatting. +//! Formatting configuration. mod builder; mod indent; diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs index 1a8545367..77f1d597a 100644 --- a/wdl-format/src/element.rs +++ b/wdl-format/src/element.rs @@ -37,7 +37,7 @@ where I: Iterator, { fn drop(&mut self) { - assert!(self.0.peek().is_none(), "not all iterator items consumed!"); + assert!(self.0.peek().is_none(), "not all iterator items were consumed!"); } } @@ -66,6 +66,9 @@ impl FormatElement { pub fn children(&self) -> Option>> { self.children .as_ref() + // NOTE: we wrap the iterator in an [`AssertConsumedIter`] to ensure + // that no children are ever forgotten to be formatted (they must be + // explicitly consumed and dropped). .map(|children| AssertConsumedIter::new(children.iter().map(|c| c.as_ref()))) } } @@ -158,7 +161,7 @@ workflow bar # This is an inline comment on the workflow ident. let format_element = Node::Ast(document).into_format_element(); let mut children = format_element.children().unwrap(); - // Version statement + // Version statement. let version = children.next().expect("version statement element"); assert_eq!( @@ -176,7 +179,7 @@ workflow bar # This is an inline comment on the workflow ident. SyntaxKind::Version ); - // Task Definition + // Task Definition. let task = children.next().expect("task element"); assert_eq!( @@ -206,7 +209,7 @@ workflow bar # This is an inline comment on the workflow ident. assert!(task_children.next().is_none()); - // Workflow Definition + // Workflow Definition. let workflow = children.next().expect("workflow element"); assert_eq!( @@ -244,7 +247,7 @@ workflow bar # This is an inline comment on the workflow ident. #[test] #[should_panic] - fn unconsumed_children_panic() { + fn unconsumed_children_nodes_panic() { let (document, diagnostics) = Document::parse( "## WDL version 1.2 # This is a comment attached to the version. From a758c8a88fbe7a1aabcb229465cf55fe513eb6eb Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 14:22:09 -0400 Subject: [PATCH 44/60] chore: cargo fmt --- wdl-ast/src/lib.rs | 1 - wdl-format/src/element.rs | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index 6b6e06371..0e600a6c9 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -204,7 +204,6 @@ pub fn token_children(parent: &SyntaxNode) -> impl Iterator Date: Mon, 14 Oct 2024 14:32:24 -0400 Subject: [PATCH 45/60] chore: revert placeholder changes --- wdl-analysis/src/types/v1.rs | 22 ++++++++----------- wdl-ast/src/v1/expr.rs | 6 ++--- .../rules/deprecated_placeholder_option.rs | 2 +- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/wdl-analysis/src/types/v1.rs b/wdl-analysis/src/types/v1.rs index 5af8a298e..06bdfffd9 100644 --- a/wdl-analysis/src/types/v1.rs +++ b/wdl-analysis/src/types/v1.rs @@ -504,20 +504,16 @@ where // Check for a sep option is specified; if so, accept `Array[P]` where `P` is // primitive. let mut coercible = false; - - for option in placeholder.options() { - if let PlaceholderOption::Sep(_) = option { - if let Type::Compound(c) = ty { - if let CompoundTypeDef::Array(a) = - self.types.type_definition(c.definition()) + if let Some(PlaceholderOption::Sep(_)) = placeholder.option() { + if let Type::Compound(c) = ty { + if let CompoundTypeDef::Array(a) = + self.types.type_definition(c.definition()) + { + if !a.element_type().is_optional() + && a.element_type().as_primitive().is_some() { - if !a.element_type().is_optional() - && a.element_type().as_primitive().is_some() - { - // OK - coercible = true; - break; - } + // OK + coercible = true; } } } diff --git a/wdl-ast/src/v1/expr.rs b/wdl-ast/src/v1/expr.rs index cdd36a238..8d492b736 100644 --- a/wdl-ast/src/v1/expr.rs +++ b/wdl-ast/src/v1/expr.rs @@ -2245,9 +2245,9 @@ impl Placeholder { .expect("should have a placeholder open token") } - /// Gets the options for the placeholder. - pub fn options(&self) -> impl Iterator { - PlaceholderOption::children(&self.0) + /// Gets the option for the placeholder. + pub fn option(&self) -> Option { + child(&self.0) } /// Gets the placeholder expression. diff --git a/wdl-lint/src/rules/deprecated_placeholder_option.rs b/wdl-lint/src/rules/deprecated_placeholder_option.rs index 3cd12c9bf..56951b3a7 100644 --- a/wdl-lint/src/rules/deprecated_placeholder_option.rs +++ b/wdl-lint/src/rules/deprecated_placeholder_option.rs @@ -142,7 +142,7 @@ impl Visitor for DeprecatedPlaceholderOptionRule { _ => return, }; - for option in placeholder.options() { + if let Some(option) = placeholder.option() { let diagnostic = match option { PlaceholderOption::Sep(option) => deprecated_sep_placeholder_option(option.span()), PlaceholderOption::Default(option) => { From bf631d875d870603a72153c519359da0f30d09fe Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 14:45:54 -0400 Subject: [PATCH 46/60] chore: revert bad hints changes --- wdl-ast/src/v1/task.rs | 50 ++++++----------------------- wdl-ast/src/v1/workflow.rs | 16 ++++----- wdl-lint/src/rules/section_order.rs | 4 +-- 3 files changed, 19 insertions(+), 51 deletions(-) diff --git a/wdl-ast/src/v1/task.rs b/wdl-ast/src/v1/task.rs index 77088e27b..f3b0526cc 100644 --- a/wdl-ast/src/v1/task.rs +++ b/wdl-ast/src/v1/task.rs @@ -23,7 +23,6 @@ use crate::support; use crate::support::child; use crate::support::children; use crate::token; -use crate::v1::WorkflowHintsSection; pub mod common; pub mod requirements; @@ -127,9 +126,7 @@ pub enum TaskItem { /// The item is a requirements section. Requirements(RequirementsSection), /// The item is a task hints section. - TaskHints(TaskHintsSection), - /// The item is a workflow hints section. - WorkflowHints(WorkflowHintsSection), + Hints(TaskHintsSection), /// The item is a runtime section. Runtime(RuntimeSection), /// The item is a metadata section. @@ -189,12 +186,9 @@ impl TaskItem { SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), )), - SyntaxKind::TaskHintsSectionNode => Some(Self::TaskHints( + SyntaxKind::TaskHintsSectionNode => Some(Self::Hints( TaskHintsSection::cast(syntax).expect("task hints section to cast"), )), - SyntaxKind::WorkflowHintsSectionNode => Some(Self::WorkflowHints( - WorkflowHintsSection::cast(syntax).expect("workflow hints section to cast"), - )), SyntaxKind::BoundDeclNode => Some(Self::Declaration( BoundDecl::cast(syntax).expect("bound decl to cast"), )), @@ -209,8 +203,7 @@ impl TaskItem { Self::Output(element) => element.syntax(), Self::Command(element) => element.syntax(), Self::Requirements(element) => element.syntax(), - Self::TaskHints(element) => element.syntax(), - Self::WorkflowHints(element) => element.syntax(), + Self::Hints(element) => element.syntax(), Self::Runtime(element) => element.syntax(), Self::Metadata(element) => element.syntax(), Self::ParameterMetadata(element) => element.syntax(), @@ -317,49 +310,24 @@ impl TaskItem { /// Attempts to get a reference to the inner [`TaskHintsSection`]. /// - /// * If `self` is a [`TaskItem::TaskHints`], then a reference to the inner + /// * If `self` is a [`TaskItem::Hints`], then a reference to the inner /// [`TaskHintsSection`] is returned wrapped in [`Some`]. /// * Else, [`None`] is returned. - pub fn as_task_hints_section(&self) -> Option<&TaskHintsSection> { + pub fn as_hints_section(&self) -> Option<&TaskHintsSection> { match self { - Self::TaskHints(task_hints_section) => Some(task_hints_section), + Self::Hints(hints_section) => Some(hints_section), _ => None, } } /// Consumes `self` and attempts to return the inner [`TaskHintsSection`]. /// - /// * If `self` is a [`TaskItem::TaskHints`], then the inner + /// * If `self` is a [`TaskItem::Hints`], then the inner /// [`TaskHintsSection`] is returned wrapped in [`Some`]. /// * Else, [`None`] is returned. - pub fn into_task_hints_section(self) -> Option { - match self { - Self::TaskHints(task_hints_section) => Some(task_hints_section), - _ => None, - } - } - - /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. - /// - /// * If `self` is a [`WorkflowItem::WorkflowHints`], then a reference to - /// the inner [`WorkflowHintsSection`] is returned wrapped in [`Some`]. - /// * Else, [`None`] is returned. - pub fn as_workflow_hints_section(&self) -> Option<&WorkflowHintsSection> { - match self { - Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), - _ => None, - } - } - - /// Consumes `self` and attempts to return the inner - /// [`WorkflowHintsSection`]. - /// - /// * If `self` is a [`WorkflowItem::WorkflowHints`], then the inner - /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. - /// * Else, [`None`] is returned. - pub fn into_workflow_hints_section(self) -> Option { + pub fn into_hints_section(self) -> Option { match self { - Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + Self::Hints(hints_section) => Some(hints_section), _ => None, } } diff --git a/wdl-ast/src/v1/workflow.rs b/wdl-ast/src/v1/workflow.rs index 09aac56c1..8e04e7fc1 100644 --- a/wdl-ast/src/v1/workflow.rs +++ b/wdl-ast/src/v1/workflow.rs @@ -116,7 +116,7 @@ pub enum WorkflowItem { /// The item is a parameter meta section. ParameterMetadata(ParameterMetadataSection), /// The item is a workflow hints section. - WorkflowHints(WorkflowHintsSection), + Hints(WorkflowHintsSection), /// The item is a private bound declaration. Declaration(BoundDecl), } @@ -170,7 +170,7 @@ impl WorkflowItem { SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), )), - SyntaxKind::WorkflowHintsSectionNode => Some(Self::WorkflowHints( + SyntaxKind::WorkflowHintsSectionNode => Some(Self::Hints( WorkflowHintsSection::cast(syntax).expect("workflow hints section to cast"), )), SyntaxKind::BoundDeclNode => Some(Self::Declaration( @@ -190,7 +190,7 @@ impl WorkflowItem { Self::Call(element) => element.syntax(), Self::Metadata(element) => element.syntax(), Self::ParameterMetadata(element) => element.syntax(), - Self::WorkflowHints(element) => element.syntax(), + Self::Hints(element) => element.syntax(), Self::Declaration(element) => element.syntax(), } } @@ -369,12 +369,12 @@ impl WorkflowItem { /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. /// - /// * If `self` is a [`WorkflowItem::TaskHints`], then a reference to the + /// * If `self` is a [`WorkflowItem::Hints`], then a reference to the /// inner [`WorkflowHintsSection`] is returned wrapped in [`Some`]. /// * Else, [`None`] is returned. - pub fn as_workflow_hints_section(&self) -> Option<&WorkflowHintsSection> { + pub fn as_hints_section(&self) -> Option<&WorkflowHintsSection> { match self { - Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + Self::Hints(hints_section) => Some(hints_section), _ => None, } } @@ -385,9 +385,9 @@ impl WorkflowItem { /// * If `self` is a [`WorkflowItem::Hints`], then the inner /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. /// * Else, [`None`] is returned. - pub fn into_workflow_hints_section(self) -> Option { + pub fn into_hints_section(self) -> Option { match self { - Self::WorkflowHints(workflow_hints_section) => Some(workflow_hints_section), + Self::Hints(hints_section) => Some(hints_section), _ => None, } } diff --git a/wdl-lint/src/rules/section_order.rs b/wdl-lint/src/rules/section_order.rs index 7070ad9e9..f4f13bb61 100644 --- a/wdl-lint/src/rules/section_order.rs +++ b/wdl-lint/src/rules/section_order.rs @@ -166,7 +166,7 @@ impl Visitor for SectionOrderingRule { TaskItem::Requirements(_) if encountered <= State::Requirements => { encountered = State::Requirements; } - TaskItem::TaskHints(_) if encountered <= State::Hints => { + TaskItem::Hints(_) if encountered <= State::Hints => { encountered = State::Hints; } _ => { @@ -218,7 +218,7 @@ impl Visitor for SectionOrderingRule { WorkflowItem::Output(_) if encountered <= State::Output => { encountered = State::Output; } - WorkflowItem::WorkflowHints(_) if encountered <= State::Hints => { + WorkflowItem::Hints(_) if encountered <= State::Hints => { encountered = State::Hints; } _ => { From df4f263272858a305b6ed77c3b154d6d315b20d9 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 14:50:42 -0400 Subject: [PATCH 47/60] Update v1.rs --- wdl-analysis/src/eval/v1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-analysis/src/eval/v1.rs b/wdl-analysis/src/eval/v1.rs index d668ad4d7..60ea05d5b 100644 --- a/wdl-analysis/src/eval/v1.rs +++ b/wdl-analysis/src/eval/v1.rs @@ -167,7 +167,7 @@ impl TaskGraphBuilder { { self.requirements = Some(graph.add_node(TaskGraphNode::Requirements(section))); } - TaskItem::TaskHints(section) + TaskItem::Hints(section) if version >= SupportedVersion::V1(V1::Two) && self.hints.is_none() && self.runtime.is_none() => From 7ddb8a79c3bb03e6f8ca0ce0ba7351731820e183 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 14:56:26 -0400 Subject: [PATCH 48/60] chore: cleanup --- wdl-ast/src/v1/workflow.rs | 4 ++-- wdl-format/src/token.rs | 3 +-- wdl-format/src/token/post.rs | 2 +- wdl-format/src/token/pre.rs | 2 +- .../clays_complex_script/source.formatted.wdl | 22 +++++++++---------- .../format/clays_complex_script/source.wdl | 22 +++++++++---------- 6 files changed, 27 insertions(+), 28 deletions(-) diff --git a/wdl-ast/src/v1/workflow.rs b/wdl-ast/src/v1/workflow.rs index 8e04e7fc1..78dcb472e 100644 --- a/wdl-ast/src/v1/workflow.rs +++ b/wdl-ast/src/v1/workflow.rs @@ -369,8 +369,8 @@ impl WorkflowItem { /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. /// - /// * If `self` is a [`WorkflowItem::Hints`], then a reference to the - /// inner [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * If `self` is a [`WorkflowItem::Hints`], then a reference to the inner + /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. /// * Else, [`None`] is returned. pub fn as_hints_section(&self) -> Option<&WorkflowHintsSection> { match self { diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 237065763..b9f6dff69 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -73,7 +73,6 @@ impl IntoIterator for TokenStream { pub enum Comment { /// A comment on it's own line. Preceding(String), - /// A comment on the same line as the code preceding it. Inline(String), } @@ -94,5 +93,5 @@ pub enum LineSpacingPolicy { BeforeComments, /// Blank lines are always allowed. #[default] - Yes, + Always, } diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index d5923bf95..345074e6f 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -153,7 +153,7 @@ impl Postprocessor { } PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => match self.line_spacing_policy { - LineSpacingPolicy::Yes => { + LineSpacingPolicy::Always => { self.blank_line(stream); } LineSpacingPolicy::BeforeComments => { diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index cf04c49ef..19fb52223 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -138,7 +138,7 @@ impl TokenStream { /// Inserts a blank lines allowed context change. pub fn blank_lines_allowed(&mut self) { self.0 - .push(PreToken::LineSpacingPolicy(LineSpacingPolicy::Yes)); + .push(PreToken::LineSpacingPolicy(LineSpacingPolicy::Always)); } /// Inserts a blank lines allowed between comments context change. diff --git a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl index 147620ed0..589c3137b 100644 --- a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl +++ b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl @@ -1,5 +1,5 @@ ## # Header -# regular comment will be left as is +# regular comment #@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing #@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput @@ -13,7 +13,7 @@ struct AStruct { task a_task { meta - # Here is a comment between `meta` and the parenthesis. + # Here is a comment between `meta` and the open brace. { # Here is a comment within `meta`. an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -36,7 +36,7 @@ task a_task { } parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. + # Here is a comment between `parameter_meta` and the open brace. { # Here is a comment within `parameter_meta`. an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -59,7 +59,7 @@ task a_task { } input - # Here is a comment before the input. + # Here is a comment between `input` and the open brace. { Object an_object String a_string @@ -73,7 +73,7 @@ task a_task { command <<< >>> output - # Here is a comment before the output. + # Here is a comment between `output` and the open brace. { Object some_other_object = { } @@ -88,7 +88,7 @@ task a_task { } requirements - # This is a comment before the requirements. + # This is a comment between `requirements` and the open brace. { container: "ubuntu:latest" } @@ -98,11 +98,11 @@ task a_task { } } -## These double-pound-sign comments -## should be converted to single-pound-sign comments. +## These are double-pound-sign comments. +## blah blah blah. workflow hello { meta - # Here is a comment between `meta` and the parenthesis. + # Here is a comment between `meta` and the open brace. { # Here is a comment within `meta`. an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -125,7 +125,7 @@ workflow hello { } parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. + # Here is a comment between `parameter_meta` and the open brace. { # Here is a comment within `parameter_meta`. an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -137,7 +137,7 @@ workflow hello { true, -42, "hello, world", - ] ## This should be converted to a single-pound-sign comment. + ] ## This is a double-pound-sign comment at the end of the line. an_object: { subkey_one: "a", subkey_two: 73, diff --git a/wdl-format/tests/format/clays_complex_script/source.wdl b/wdl-format/tests/format/clays_complex_script/source.wdl index 81faa4fa6..b3b78ba19 100644 --- a/wdl-format/tests/format/clays_complex_script/source.wdl +++ b/wdl-format/tests/format/clays_complex_script/source.wdl @@ -1,5 +1,5 @@ ## # Header -# regular comment will be left as is +# regular comment #@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing #@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput @@ -13,7 +13,7 @@ struct AStruct { task a_task { meta - # Here is a comment between `meta` and the parenthesis. + # Here is a comment between `meta` and the open brace. { # Here is a comment within `meta`. an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -32,7 +32,7 @@ task a_task { } parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. + # Here is a comment between `parameter_meta` and the open brace. { # Here is a comment within `parameter_meta`. an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -51,7 +51,7 @@ task a_task { } input - # Here is a comment before the input. + # Here is a comment between `input` and the open brace. { Object an_object String a_string @@ -65,7 +65,7 @@ task a_task { command <<< >>> output - # Here is a comment before the output. + # Here is a comment between `output` and the open brace. { Object some_other_object = {} String some_other_string = "foo bar baz" @@ -78,7 +78,7 @@ task a_task { } requirements - # This is a comment before the requirements. + # This is a comment between `requirements` and the open brace. { container: "ubuntu:latest" } @@ -88,11 +88,11 @@ task a_task { } } -## These double-pound-sign comments -## should be converted to single-pound-sign comments. +## These are double-pound-sign comments. +## blah blah blah. workflow hello { meta - # Here is a comment between `meta` and the parenthesis. + # Here is a comment between `meta` and the open brace. { # Here is a comment within `meta`. an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -111,7 +111,7 @@ workflow hello { } parameter_meta - # Here is a comment between `parameter_meta` and the parenthesis. + # Here is a comment between `parameter_meta` and the open brace. { # Here is a comment within `parameter_meta`. an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" @@ -119,7 +119,7 @@ workflow hello { a_false: false an_integer: 42 a_float: -0.0e123 - an_array: [true, -42, "hello, world"] ## This should be converted to a single-pound-sign comment. + an_array: [true, -42, "hello, world"] ## This is a double-pound-sign comment at the end of the line. an_object: { subkey_one: "a", subkey_two: 73, From 35aed7e77dc8c6642e0c9b6f1ed86cc9d2894745 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 14:57:24 -0400 Subject: [PATCH 49/60] Update main.rs --- ci/src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/src/main.rs b/ci/src/main.rs index d515a8cfc..206cd11de 100644 --- a/ci/src/main.rs +++ b/ci/src/main.rs @@ -54,6 +54,7 @@ const SORTED_CRATES_TO_PUBLISH: &[&str] = &[ "wdl-grammar", "wdl-ast", "wdl-lint", + "wdl-format", "wdl-analysis", "wdl-lsp", "wdl", From 31bd09a070141e5fbd3ac58bc3958a6ab0480621 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 15:21:44 -0400 Subject: [PATCH 50/60] Update Gauntlet.toml --- Gauntlet.toml | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/Gauntlet.toml b/Gauntlet.toml index 3ffb659b2..d515a62d6 100644 --- a/Gauntlet.toml +++ b/Gauntlet.toml @@ -1406,16 +1406,31 @@ document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:457:40: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L457" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" +message = "BenchmarkSVs.wdl:457:49: error: cannot coerce type `Array[Int]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L457" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:485:43: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L485" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" +message = "BenchmarkSVs.wdl:485:52: error: cannot coerce type `Array[Int]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L485" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:772:43: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L772" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" +message = "BenchmarkSVs.wdl:772:52: error: cannot coerce type `Array[Int]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L772" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:77:28: error: type mismatch: expected type `String`, but found type `String?`" @@ -1426,16 +1441,31 @@ document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:789:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L789" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" +message = "BenchmarkSVs.wdl:789:49: error: cannot coerce type `Array[String]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L789" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:790:42: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L790" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" +message = "BenchmarkSVs.wdl:790:51: error: cannot coerce type `Array[Int]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L790" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" message = "BenchmarkSVs.wdl:890:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L890" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkSVs/BenchmarkSVs.wdl" +message = "BenchmarkSVs.wdl:890:49: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkSVs/BenchmarkSVs.wdl/#L890" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkSVs/CleanSVs.wdl" message = "CleanSVs.wdl:23:29: error: type mismatch: expected type `String`, but found type `String?`" @@ -1511,21 +1541,41 @@ document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:613:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L613" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" +message = "BenchmarkVCFs.wdl:613:49: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L613" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:621:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L621" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" +message = "BenchmarkVCFs.wdl:621:49: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L621" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:626:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L626" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" +message = "BenchmarkVCFs.wdl:626:49: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L626" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:631:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L631" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" +message = "BenchmarkVCFs.wdl:631:49: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/BenchmarkVCFs/BenchmarkVCFs.wdl/#L631" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/BenchmarkVCFs/BenchmarkVCFs.wdl" message = "BenchmarkVCFs.wdl:91:54: error: type mismatch: argument to function `select_first` expects type `Array[X]` where `X`: any optional type, but found type `Array[File]`" @@ -2036,11 +2086,21 @@ document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIn message = "ComputeIntervalBamStats.wdl:223:37: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L223" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" +message = "ComputeIntervalBamStats.wdl:223:50: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L223" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" message = "ComputeIntervalBamStats.wdl:270:37: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L270" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" +message = "ComputeIntervalBamStats.wdl:270:50: error: cannot coerce type `Array[File]` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl/#L270" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/IntervalFiles/ComputeIntervalBamStats.wdl" message = "ComputeIntervalBamStats.wdl:77:14: warning[UnusedInput]: unused input `ref_fasta`" @@ -2051,16 +2111,31 @@ document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.w message = "CreateIGVSession.wdl:51:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L51" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" +message = "CreateIGVSession.wdl:51:50: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L51" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" message = "CreateIGVSession.wdl:52:36: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L52" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" +message = "CreateIGVSession.wdl:52:50: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L52" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" message = "CreateIGVSession.wdl:53:46: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L53" +[[diagnostics]] +document = "broadinstitute/palantir-workflows:/Utilities/WDLs/CreateIGVSession.wdl" +message = "CreateIGVSession.wdl:53:60: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/palantir-workflows/blob/f8833a0000cd441a3cc4be2718876a1433cb21ef/Utilities/WDLs/CreateIGVSession.wdl/#L53" + [[diagnostics]] document = "broadinstitute/palantir-workflows:/Utilities/WDLs/DownsampleAndCollectCoverage.wdl" message = "DownsampleAndCollectCoverage.wdl:173:91: error: type mismatch: expected type `Float?`, but found type `String?`" @@ -2836,16 +2911,31 @@ document = "broadinstitute/warp:/tasks/broad/CopyFilesFromCloudToCloud.wdl" message = "CopyFilesFromCloudToCloud.wdl:71:10: error: type mismatch: expected type `Int` or type `Float`, but found type `String`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/CopyFilesFromCloudToCloud.wdl/#L71" +[[diagnostics]] +document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" +message = "Funcotator.wdl:163:118: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L163" + [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" message = "Funcotator.wdl:163:92: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L163" +[[diagnostics]] +document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" +message = "Funcotator.wdl:164:118: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L164" + [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" message = "Funcotator.wdl:164:89: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L164" +[[diagnostics]] +document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" +message = "Funcotator.wdl:165:121: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Funcotator.wdl/#L165" + [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Funcotator.wdl" message = "Funcotator.wdl:165:91: error: a placeholder cannot have more than one option" @@ -3041,6 +3131,11 @@ document = "broadinstitute/warp:/tasks/broad/Qc.wdl" message = "Qc.wdl:434:31: error: a placeholder cannot have more than one option" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Qc.wdl/#L434" +[[diagnostics]] +document = "broadinstitute/warp:/tasks/broad/Qc.wdl" +message = "Qc.wdl:434:46: error: cannot coerce type `Array[String]?` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/tasks/broad/Qc.wdl/#L434" + [[diagnostics]] document = "broadinstitute/warp:/tasks/broad/Qc.wdl" message = "Qc.wdl:436:46: error: a placeholder cannot have more than one option" @@ -3416,6 +3511,11 @@ document = "broadinstitute/warp:/verification/VerifyMetrics.wdl" message = "VerifyMetrics.wdl:73:11: warning[UnusedInput]: unused input `dependency_input`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyMetrics.wdl/#L73" +[[diagnostics]] +document = "broadinstitute/warp:/verification/VerifyMetrics.wdl" +message = "VerifyMetrics.wdl:87:117: error: cannot coerce type `Array[String]` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyMetrics.wdl/#L87" + [[diagnostics]] document = "broadinstitute/warp:/verification/VerifyMetrics.wdl" message = "VerifyMetrics.wdl:87:89: error: a placeholder cannot have more than one option" @@ -3696,6 +3796,11 @@ document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsJointGenotypin message = "VerifyUltimaGenomicsJointGenotyping.wdl:48:8: warning[UnusedCall]: unused call `CompareFingerprints`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsJointGenotyping.wdl/#L48" +[[diagnostics]] +document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" +message = "VerifyUltimaGenomicsWholeGenomeCramOnly.wdl:102:115: error: cannot coerce type `Array[String]` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl/#L102" + [[diagnostics]] document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" message = "VerifyUltimaGenomicsWholeGenomeCramOnly.wdl:102:87: error: a placeholder cannot have more than one option" @@ -3736,6 +3841,11 @@ document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeGer message = "VerifyUltimaGenomicsWholeGenomeGermline.wdl:161:11: warning[UnusedInput]: unused input `dependency_input`" permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl/#L161" +[[diagnostics]] +document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl" +message = "VerifyUltimaGenomicsWholeGenomeGermline.wdl:174:115: error: cannot coerce type `Array[String]` to `String`" +permalink = "https://github.com/broadinstitute/warp/blob/ec91e512235419b267e295583db3ec2594fcd717/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl/#L174" + [[diagnostics]] document = "broadinstitute/warp:/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl" message = "VerifyUltimaGenomicsWholeGenomeGermline.wdl:174:87: error: a placeholder cannot have more than one option" From c8f377055d3072ff702a3a04d88a268b4030c443 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 15:31:26 -0400 Subject: [PATCH 51/60] tests: move registry.rs to tests/ --- wdl-ast/Cargo.toml | 4 + wdl-ast/src/lib.rs | 2 - wdl-ast/{src => tests}/registry.rs | 149 ++++++++++++++--------------- 3 files changed, 76 insertions(+), 79 deletions(-) rename wdl-ast/{src => tests}/registry.rs (78%) diff --git a/wdl-ast/Cargo.toml b/wdl-ast/Cargo.toml index fe6698db6..4b32b6e72 100644 --- a/wdl-ast/Cargo.toml +++ b/wdl-ast/Cargo.toml @@ -35,3 +35,7 @@ workspace = true name = "validation" required-features = ["codespan"] harness = false + +[[test]] +name = "registry" +harness = false diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index 0e600a6c9..dec52a7d7 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -63,8 +63,6 @@ pub use wdl_grammar::version; pub mod v1; mod element; -#[cfg(test)] -mod registry; mod validation; mod visitor; diff --git a/wdl-ast/src/registry.rs b/wdl-ast/tests/registry.rs similarity index 78% rename from wdl-ast/src/registry.rs rename to wdl-ast/tests/registry.rs index 5445361f1..d72a95bfd 100644 --- a/wdl-ast/src/registry.rs +++ b/wdl-ast/tests/registry.rs @@ -21,15 +21,15 @@ use std::sync::LazyLock; use wdl_grammar::ALL_SYNTAX_KIND; use wdl_grammar::WorkflowDescriptionLanguage; -use crate::AstNode; -use crate::AstToken; -use crate::Comment; -use crate::Ident; -use crate::SyntaxKind; -use crate::Version; -use crate::VersionStatement; -use crate::Whitespace; -use crate::v1; +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Comment; +use wdl_ast::Ident; +use wdl_ast::SyntaxKind; +use wdl_ast::Version; +use wdl_ast::VersionStatement; +use wdl_ast::Whitespace; +use wdl_ast::v1; /// A private module for sealed traits. /// @@ -298,84 +298,79 @@ impl AstTokenRegistrant for T { } } -mod tests { - use super::*; - - /// This test ensures there is a one-to-one mapping between CST elements - /// ([`SyntaxKind`]\(s)) and AST elements (Rust types that implement - /// the [`AstNode`] trait or the [`AstToken`] trait). - /// - /// The importance of this is described at the top of the module. - #[test] - fn ensure_one_to_one() { - let mut missing = Vec::new(); - let mut multiple = Vec::new(); +/// This test ensures there is a one-to-one mapping between CST elements +/// ([`SyntaxKind`]\(s)) and AST elements (Rust types that implement +/// the [`AstNode`] trait or the [`AstToken`] trait). +/// +/// The importance of this is described at the top of the module. +fn main() { + let mut missing = Vec::new(); + let mut multiple = Vec::new(); - let inverse_registry = inverse(); + let inverse_registry = inverse(); - for kind in ALL_SYNTAX_KIND { - // NOTE: these are symbolic elements and should not be included in - // the analysis here. - if kind.is_symbolic() { - continue; - } + for kind in ALL_SYNTAX_KIND { + // NOTE: these are symbolic elements and should not be included in + // the analysis here. + if kind.is_symbolic() { + continue; + } - match inverse_registry.get(kind) { - // SAFETY: because this is an inverse registry, only - // [`SyntaxKind`]s with at least one registered implementing - // type would be registered here. Thus, by design of the - // `inverse()` method, this will never occur. - Some(values) if values.is_empty() => { - unreachable!("the inverse registry should never contain an empty array") - } - Some(values) if values.len() > 1 => multiple.push((kind, values)), - None => missing.push(kind), - // NOTE: this is essentially only if the values exist and the - // length is 1—in that case, there is a one to one mapping, - // which is what we would like the case to be. - _ => {} + match inverse_registry.get(kind) { + // SAFETY: because this is an inverse registry, only + // [`SyntaxKind`]s with at least one registered implementing + // type would be registered here. Thus, by design of the + // `inverse()` method, this will never occur. + Some(values) if values.is_empty() => { + unreachable!("the inverse registry should never contain an empty array") } + Some(values) if values.len() > 1 => multiple.push((kind, values)), + None => missing.push(kind), + // NOTE: this is essentially only if the values exist and the + // length is 1—in that case, there is a one to one mapping, + // which is what we would like the case to be. + _ => {} } + } - if !missing.is_empty() { - let mut missing = missing - .into_iter() - .map(|kind| format!("{:?}", kind)) - .collect::>(); - missing.sort(); + if !missing.is_empty() { + let mut missing = missing + .into_iter() + .map(|kind| format!("{:?}", kind)) + .collect::>(); + missing.sort(); - panic!( - "detected `SyntaxKind`s without an associated `AstNode`/`AstToken` (n={}): {}", - missing.len(), - missing.join(", ") - ) - } + panic!( + "detected `SyntaxKind`s without an associated `AstNode`/`AstToken` (n={}): {}", + missing.len(), + missing.join(", ") + ) + } - if !multiple.is_empty() { - multiple.sort(); - let mut multiple = multiple - .into_iter() - .map(|(kind, types)| { - let mut types = types.clone(); - types.sort(); + if !multiple.is_empty() { + multiple.sort(); + let mut multiple = multiple + .into_iter() + .map(|(kind, types)| { + let mut types = types.clone(); + types.sort(); - let mut result = format!("== {:?} ==", kind); - for r#type in types { - result.push_str("\n* "); - result.push_str(r#type); - } + let mut result = format!("== {:?} ==", kind); + for r#type in types { + result.push_str("\n* "); + result.push_str(r#type); + } - result - }) - .collect::>(); - multiple.sort(); + result + }) + .collect::>(); + multiple.sort(); - panic!( - "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s \ - (n={}):\n\n{}", - multiple.len(), - multiple.join("\n\n") - ) - } + panic!( + "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s \ + (n={}):\n\n{}", + multiple.len(), + multiple.join("\n\n") + ) } } From a6ef511980590e947381657ab79eaff146bf09a9 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 15:33:10 -0400 Subject: [PATCH 52/60] Update registry.rs --- wdl-ast/tests/registry.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/wdl-ast/tests/registry.rs b/wdl-ast/tests/registry.rs index d72a95bfd..e52b55c24 100644 --- a/wdl-ast/tests/registry.rs +++ b/wdl-ast/tests/registry.rs @@ -18,9 +18,6 @@ use std::any::type_name; use std::collections::HashMap; use std::sync::LazyLock; -use wdl_grammar::ALL_SYNTAX_KIND; -use wdl_grammar::WorkflowDescriptionLanguage; - use wdl_ast::AstNode; use wdl_ast::AstToken; use wdl_ast::Comment; @@ -30,6 +27,8 @@ use wdl_ast::Version; use wdl_ast::VersionStatement; use wdl_ast::Whitespace; use wdl_ast::v1; +use wdl_grammar::ALL_SYNTAX_KIND; +use wdl_grammar::WorkflowDescriptionLanguage; /// A private module for sealed traits. /// @@ -367,8 +366,7 @@ fn main() { multiple.sort(); panic!( - "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s \ - (n={}):\n\n{}", + "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s (n={}):\n\n{}", multiple.len(), multiple.join("\n\n") ) From e77afde5036ec8753a11bf4a5059a1570f5fa3d1 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 15:37:46 -0400 Subject: [PATCH 53/60] tests: reconfigure --- wdl-ast/Cargo.toml | 4 ---- wdl-ast/tests/registry.rs | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/wdl-ast/Cargo.toml b/wdl-ast/Cargo.toml index 4b32b6e72..fe6698db6 100644 --- a/wdl-ast/Cargo.toml +++ b/wdl-ast/Cargo.toml @@ -35,7 +35,3 @@ workspace = true name = "validation" required-features = ["codespan"] harness = false - -[[test]] -name = "registry" -harness = false diff --git a/wdl-ast/tests/registry.rs b/wdl-ast/tests/registry.rs index e52b55c24..49df23f82 100644 --- a/wdl-ast/tests/registry.rs +++ b/wdl-ast/tests/registry.rs @@ -302,7 +302,8 @@ impl AstTokenRegistrant for T { /// the [`AstNode`] trait or the [`AstToken`] trait). /// /// The importance of this is described at the top of the module. -fn main() { +#[test] +fn ensures_one_to_one() { let mut missing = Vec::new(); let mut multiple = Vec::new(); From 160dc8ba750956c60eabd2f687583030f262f57f Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 14 Oct 2024 16:26:52 -0400 Subject: [PATCH 54/60] feat: use config for indents --- wdl-format/src/lib.rs | 2 +- wdl-format/src/token.rs | 14 +------------- wdl-format/src/token/post.rs | 17 +++++++++++------ 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index faeea5672..545d5b018 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -201,7 +201,7 @@ impl Formatter { let mut result = String::new(); for token in self.to_stream(element) { - write!(result, "{token}")?; + write!(result, "{token}", token = token.display(self.config()))?; } Ok(result) diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index b9f6dff69..0add01a6b 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -3,13 +3,11 @@ mod post; mod pre; -use std::fmt::Display; - pub use post::*; pub use pre::*; /// Tokens that are streamable. -pub trait Token: Display + Eq + PartialEq {} +pub trait Token: Eq + PartialEq {} /// A stream of tokens. Tokens in this case are either [`PreToken`]s or /// [`PostToken`]s. Note that, unless you are working on formatting @@ -24,16 +22,6 @@ impl Default for TokenStream { } } -impl std::fmt::Display for TokenStream { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for value in &self.0 { - write!(f, "{value}")?; - } - - Ok(()) - } -} - impl TokenStream { /// Pushes a token into the stream. pub fn push(&mut self, token: T) { diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 345074e6f..d5c363aca 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -13,6 +13,7 @@ use crate::SPACE; use crate::Token; use crate::TokenStream; use crate::Trivia; +use crate::config::Indent; /// A postprocessed token. #[derive(Eq, PartialEq)] @@ -41,13 +42,17 @@ impl std::fmt::Debug for PostToken { } } -impl std::fmt::Display for PostToken { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl PostToken { + /// Returns a displayable version of the token. + pub fn display<'a>(&'a self, config: &'a crate::Config) -> Box { match self { - PostToken::Space => write!(f, "{SPACE}"), - PostToken::Newline => write!(f, "{NEWLINE}"), - PostToken::Indent => write!(f, " "), // TODO(af): Make this configurable. - PostToken::Literal(value) => write!(f, "{value}"), + Self::Space => Box::new(SPACE), + Self::Newline => Box::new(NEWLINE), + Self::Indent => Box::new(match config.indent() { + Indent::Spaces(n) => " ".repeat((n).into()), + Indent::Tabs(n) => "\t".repeat((n).into()), + }), + Self::Literal(value) => Box::new(value), } } } From af1462aff536c2ad991bce48de040436ab782096 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 15 Oct 2024 09:34:46 -0400 Subject: [PATCH 55/60] revise: code review feedback --- wdl-format/src/token/post.rs | 6 +- wdl-grammar/src/tree.rs | 342 ++++++++++++++++------------------- 2 files changed, 156 insertions(+), 192 deletions(-) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index d5c363aca..eaf0cb014 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -153,7 +153,7 @@ impl Postprocessor { { stream.0.pop(); } - stream.push(PostToken::Literal(value.to_owned())); + stream.push(PostToken::Literal(value)); self.position = LinePosition::MiddleOfLine; } PreToken::Trivia(trivia) => match trivia { @@ -177,7 +177,7 @@ impl Postprocessor { self.interrupted = true; } self.end_line(stream); - stream.push(PostToken::Literal(value.to_owned())); + stream.push(PostToken::Literal(value)); self.position = LinePosition::MiddleOfLine; } Comment::Inline(value) => { @@ -190,7 +190,7 @@ impl Postprocessor { self.trim_last_line(stream); stream.push(PostToken::Space); stream.push(PostToken::Space); - stream.push(PostToken::Literal(value.to_owned())); + stream.push(PostToken::Literal(value)); } } self.end_line(stream); diff --git a/wdl-grammar/src/tree.rs b/wdl-grammar/src/tree.rs index 103c7bcad..abd1dfca2 100644 --- a/wdl-grammar/src/tree.rs +++ b/wdl-grammar/src/tree.rs @@ -5,7 +5,9 @@ pub mod dive; use std::borrow::Cow; use std::collections::VecDeque; use std::fmt; +use std::iter; +use itertools::Either; use rowan::Direction; use rowan::GreenNodeBuilder; use rowan::GreenNodeData; @@ -716,98 +718,20 @@ impl fmt::Debug for SyntaxTree { } } -/// Gathers substantial trivia (comments and blank lines) from a -/// [`SyntaxToken`]. -/// -/// Whitespace is only considered substantial if it contains more than one -/// newline. Comments are always considered substantial. -fn gather_substantial_trivia( - source: &SyntaxToken, - direction: Direction, - break_on_newline: bool, -) -> Box<[SyntaxToken]> { - /// Adds the token to the currently collecting buffer in the right place - /// depending in the direction we are traversing. - fn push_results( - token: SyntaxToken, - results: &mut VecDeque, - direction: &Direction, - ) { - match direction { - Direction::Next => results.push_back(token), - Direction::Prev => results.push_front(token), - } - } - - let mut results = VecDeque::new(); - let mut cur = match direction { - Direction::Next => source.next_token(), - Direction::Prev => source.prev_token(), - }; - while let Some(t) = cur { - if !t.kind().is_trivia() { - break; - } - - match t.kind() { - SyntaxKind::Comment => { - // Check if t is a comment on its own line. - // If direction is 'Next' then we already know that the - // comment is on its own line. - if direction == Direction::Prev { - if let Some(prev) = t.prev_token() { - if prev.kind() == SyntaxKind::Whitespace { - let newlines = prev.text().chars().filter(|c| *c == '\n').count(); - - // If there are newlines in 'prev' then we know - // that the comment is on its own line. - // The comment may still be on its own line if - // 'prev' does not have newlines and nothing comes - // before 'prev'. - if newlines == 0 && prev.prev_token().is_some() { - break; - } - } else { - // There is something else on this line before the comment. - break; - } - } - } - push_results(t.clone(), &mut results, &direction); - } - SyntaxKind::Whitespace => { - let newlines = t.text().chars().filter(|c| *c == '\n').count(); - - if break_on_newline && newlines > 0 { - break; - } - - if newlines > 1 { - push_results(t.clone(), &mut results, &direction); - } - } - // SAFETY: we just filtered out any non-comment and - // non-whitespace nodes above, so this should never occur. - _ => unreachable!(), - } - cur = match direction { - Direction::Next => t.next_token(), - Direction::Prev => t.prev_token(), - }; - } - - // NOTE: most of the time, this conversion will be O(1). Occassionally - // it will be O(n). No allocations will ever be done. Thus, the - // ammortized cost of this is quite cheap. - Vec::from(results).into_boxed_slice() -} - /// An extension trait for [`SyntaxNode`]s, [`SyntaxToken`]s, and /// [`SyntaxElement`]s. pub trait SyntaxExt { /// Returns whether `self` matches the provided element. fn matches(&self, other: &SyntaxElement) -> bool; + /// Gets the parent of the element. + /// + /// Returns `None` for the root node. + fn parent(&self) -> Option; + + /// Gets the child index of the element. + fn index(&self) -> usize; + /// Gets the siblings with tokens. /// /// **NOTE:** this needed because Rowan does not encapsulate this @@ -815,42 +739,26 @@ pub trait SyntaxExt { /// provided by this extension trait can just be provided, which simplifies /// the code. Generally speaking, this should just defer to the underlying /// `siblings_with_tokens` method for each type. - fn siblings_with_tokens(&self, direction: Direction) - -> Box>; + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator; /// Returns all of the siblings _before_ the current element. /// /// The siblings are returned in the order they were parsed. - fn preceding_siblings(&self) -> Box<[SyntaxElement]> { - let mut results = VecDeque::new(); - - self.siblings_with_tokens(Direction::Prev) - // NOTE: this `skip_while` is necessary because - // `siblings_with_tokens` returns the current node. - .skip_while(|e| self.matches(e)) - .for_each(|e| results.push_front(e)); - - // NOTE: most of the time, this conversion will be O(1). Occassionally - // it will be O(n). No allocations will ever be done. Thus, the - // ammortized cost of this is quite cheap. - Vec::from(results).into_boxed_slice() + fn preceding_siblings(&self) -> impl Iterator { + let index = self.index(); + self.parent() + .into_iter() + .flat_map(move |p| p.children_with_tokens().take(index)) } /// Returns all of the siblings _after_ the current element. /// /// The siblings are returned in the order they were parsed. - fn succeeding_siblings(&self) -> Box<[SyntaxElement]> { - let mut results = Vec::new(); - + fn succeeding_siblings(&self) -> impl Iterator { self.siblings_with_tokens(Direction::Next) - // NOTE: this `skip_while` is necessary because - // `siblings_with_tokens` returns the current node. - .skip_while(|e| self.matches(e)) - .for_each(|e| results.push(e)); - - // NOTE: this should always be O(1) and never require any additional - // allocations. - results.into_boxed_slice() + // NOTE: this `skip` is necessary because `siblings_with_tokens` returns the current + // node. + .skip(1) } /// Gets all elements that are adjacent to a particular element (not @@ -858,13 +766,8 @@ pub trait SyntaxExt { /// reverse direction. /// /// The siblings are returned in the order they were parsed. - fn adjacent(&self) -> Box<[SyntaxElement]> { - let mut results = Vec::from(self.preceding_siblings()); - results.extend(self.succeeding_siblings().iter().cloned()); - - // NOTE: this should always be O(1) and never require any additional - // allocations. - results.into_boxed_slice() + fn adjacent(&self) -> impl Iterator { + self.preceding_siblings().chain(self.succeeding_siblings()) } } @@ -873,11 +776,16 @@ impl SyntaxExt for SyntaxNode { other.as_node().map(|n| n == self).unwrap_or(false) } - fn siblings_with_tokens( - &self, - direction: Direction, - ) -> Box> { - Box::new(self.siblings_with_tokens(direction)) + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator { + self.siblings_with_tokens(direction) + } + + fn parent(&self) -> Option { + self.parent() + } + + fn index(&self) -> usize { + self.index() } } @@ -886,11 +794,16 @@ impl SyntaxExt for SyntaxToken { other.as_token().map(|n| n == self).unwrap_or(false) } - fn siblings_with_tokens( - &self, - direction: Direction, - ) -> Box> { - Box::new(self.siblings_with_tokens(direction)) + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator { + self.siblings_with_tokens(direction) + } + + fn parent(&self) -> Option { + self.parent() + } + + fn index(&self) -> usize { + self.index() } } @@ -899,65 +812,119 @@ impl SyntaxExt for SyntaxElement { self == other } - fn siblings_with_tokens( - &self, - direction: Direction, - ) -> Box> { + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator { match self { - SyntaxElement::Node(node) => Box::new(node.siblings_with_tokens(direction)), - SyntaxElement::Token(token) => Box::new(token.siblings_with_tokens(direction)), + SyntaxElement::Node(node) => Either::Left(node.siblings_with_tokens(direction)), + SyntaxElement::Token(token) => Either::Right(token.siblings_with_tokens(direction)), } } + + fn parent(&self) -> Option { + self.parent() + } + + fn index(&self) -> usize { + self.index() + } } /// An extension trait for [`SyntaxToken`]s. pub trait SyntaxTokenExt { /// Gets all of the substantial preceding trivia for an element. - fn preceding_trivia(&self) -> Box<[SyntaxToken]> - where - Self: Sized, - Self: SyntaxExt; + fn preceding_trivia(&self) -> impl Iterator; /// Gets all of the substantial succeeding trivia for an element. - fn succeeding_trivia(&self) -> Box<[SyntaxToken]> - where - Self: Sized, - Self: SyntaxExt; + fn succeeding_trivia(&self) -> impl Iterator; /// Get any inline comment directly following an element on the /// same line. - fn inline_comment(&self) -> Option - where - Self: Sized, - Self: SyntaxExt; + fn inline_comment(&self) -> Option; } impl SyntaxTokenExt for SyntaxToken { - fn preceding_trivia(&self) -> Box<[SyntaxToken]> - where - Self: Sized, - Self: SyntaxExt, - { - gather_substantial_trivia(self, Direction::Prev, false) + fn preceding_trivia(&self) -> impl Iterator { + let mut tokens = VecDeque::new(); + let mut cur = self.prev_token(); + while let Some(token) = cur { + cur = token.prev_token(); + // Stop at first non-trivia + if !token.kind().is_trivia() { + break; + } + // Stop if a comment is not on its own line + if token.kind() == SyntaxKind::Comment { + if let Some(prev) = token.prev_token() { + if prev.kind() == SyntaxKind::Whitespace { + let has_newlines = prev.text().chars().any(|c| c == '\n'); + // If there are newlines in 'prev' then we know + // that the comment is on its own line. + // The comment may still be on its own line if + // 'prev' does not have newlines and nothing comes + // before 'prev'. + if !has_newlines && prev.prev_token().is_some() { + break; + } + } else { + // There is something else on this line before the comment. + break; + } + } + } + // Filter out whitespace that is not substantial + match token.kind() { + SyntaxKind::Whitespace + if token.text().chars().filter(|c| *c == '\n').count() > 1 => + { + tokens.push_front(token); + } + SyntaxKind::Comment => { + tokens.push_front(token); + } + _ => {} + } + } + tokens.into_iter() } - fn succeeding_trivia(&self) -> Box<[SyntaxToken]> - where - Self: Sized, - Self: SyntaxExt, - { - gather_substantial_trivia(self, Direction::Next, false) + fn succeeding_trivia(&self) -> impl Iterator { + let mut next = self.next_token(); + iter::from_fn(move || { + let cur = next.clone()?; + next = cur.next_token(); + Some(cur) + }) + .take_while(|t| { + // Stop at first non-trivia + t.kind().is_trivia() + }) + .filter(|t| { + // Filter out whitespace that is not substantial + if t.kind() == SyntaxKind::Whitespace { + return t.text().chars().filter(|c| *c == '\n').count() > 1; + } + true + }) } - fn inline_comment(&self) -> Option - where - Self: Sized, - Self: SyntaxExt, - { - gather_substantial_trivia(self, Direction::Next, true) - // NOTE: at most, there can be one contiguous comment on a line. - .first() - .cloned() + fn inline_comment(&self) -> Option { + let mut next = self.next_token(); + iter::from_fn(move || { + let cur = next.clone()?; + next = cur.next_token(); + Some(cur) + }) + .take_while(|t| { + // Stop at non-trivia + if !t.kind().is_trivia() { + return false; + } + // Stop on first whitespace containing a newline + if t.kind() == SyntaxKind::Whitespace { + return !t.text().chars().any(|c| c == '\n'); + } + true + }) + .find(|t| t.kind() == SyntaxKind::Comment) } } @@ -991,21 +958,18 @@ workflow foo {} # This should not be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - let trivia = workflow.first_token().unwrap().preceding_trivia(); - let mut trivia_iter = trivia.iter(); - assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); - assert_eq!(trivia_iter.next().unwrap().text(), "# Some"); - assert_eq!(trivia_iter.next().unwrap().text(), "# comments"); - assert_eq!(trivia_iter.next().unwrap().text(), "# are"); - assert_eq!(trivia_iter.next().unwrap().text(), "# long"); - assert_eq!(trivia_iter.next().unwrap().text(), "\n \n"); - assert_eq!(trivia_iter.next().unwrap().text(), "# Others are short"); - assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); - assert_eq!( - trivia_iter.next().unwrap().text(), - "# and, yet another" - ); - assert!(trivia_iter.next().is_none()); + let token = workflow.first_token().unwrap(); + let mut trivia = token.preceding_trivia(); + assert_eq!(trivia.next().unwrap().text(), "\n\n"); + assert_eq!(trivia.next().unwrap().text(), "# Some"); + assert_eq!(trivia.next().unwrap().text(), "# comments"); + assert_eq!(trivia.next().unwrap().text(), "# are"); + assert_eq!(trivia.next().unwrap().text(), "# long"); + assert_eq!(trivia.next().unwrap().text(), "\n \n"); + assert_eq!(trivia.next().unwrap().text(), "# Others are short"); + assert_eq!(trivia.next().unwrap().text(), "\n\n"); + assert_eq!(trivia.next().unwrap().text(), "# and, yet another"); + assert!(trivia.next().is_none()); } #[test] @@ -1026,18 +990,18 @@ workflow foo {} # Here is a comment that should be collected. let workflow = tree.root().last_child().unwrap(); assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); - let trivia = workflow.last_token().unwrap().succeeding_trivia(); - let mut trivia_iter = trivia.iter(); + let token = workflow.last_token().unwrap(); + let mut trivia = token.succeeding_trivia(); assert_eq!( - trivia_iter.next().unwrap().text(), + trivia.next().unwrap().text(), "# Here is a comment that should be collected." ); - assert_eq!(trivia_iter.next().unwrap().text(), "\n\n"); + assert_eq!(trivia.next().unwrap().text(), "\n\n"); assert_eq!( - trivia_iter.next().unwrap().text(), + trivia.next().unwrap().text(), "# This comment should be included too." ); - assert!(trivia_iter.next().is_none()); + assert!(trivia.next().is_none()); } #[test] From d6aff8bc1e0d6b897655a649b3a288d16ced0624 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 15 Oct 2024 09:39:44 -0400 Subject: [PATCH 56/60] Apply suggestions from code review Co-authored-by: Peter Huene --- wdl-format/src/token.rs | 2 +- wdl-grammar/src/tree.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 0add01a6b..17f547026 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -59,7 +59,7 @@ impl IntoIterator for TokenStream { /// The kind of comment. #[derive(Debug, Eq, PartialEq)] pub enum Comment { - /// A comment on it's own line. + /// A comment on its own line. Preceding(String), /// A comment on the same line as the code preceding it. Inline(String), diff --git a/wdl-grammar/src/tree.rs b/wdl-grammar/src/tree.rs index abd1dfca2..562540577 100644 --- a/wdl-grammar/src/tree.rs +++ b/wdl-grammar/src/tree.rs @@ -734,7 +734,7 @@ pub trait SyntaxExt { /// Gets the siblings with tokens. /// - /// **NOTE:** this needed because Rowan does not encapsulate this + /// **NOTE:** this is needed because Rowan does not encapsulate this /// functionality in a trait. Once wrapped here, most of the functions /// provided by this extension trait can just be provided, which simplifies /// the code. Generally speaking, this should just defer to the underlying From 84090d9e62a41a343416adb7d4154c1414d87fa1 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 15 Oct 2024 10:15:57 -0400 Subject: [PATCH 57/60] chore: code review feedback --- wdl-ast/src/lib.rs | 3 +-- wdl-format/src/config/indent.rs | 6 ++---- wdl-format/src/token.rs | 8 +++++++- wdl-format/src/token/post.rs | 23 ++++++++++++----------- wdl-format/src/token/pre.rs | 7 ++++++- 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index dec52a7d7..5618ebe95 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -43,7 +43,6 @@ pub use rowan::Direction; pub use rowan::ast::AstChildren; pub use rowan::ast::AstNode; pub use rowan::ast::support; -use v1::VersionKeyword; pub use wdl_grammar::Diagnostic; pub use wdl_grammar::Label; pub use wdl_grammar::Severity; @@ -403,7 +402,7 @@ impl VersionStatement { } /// Gets the version keyword of the version statement. - pub fn keyword(&self) -> VersionKeyword { + pub fn keyword(&self) -> v1::VersionKeyword { token(&self.0).expect("version statement must have a version keyword") } } diff --git a/wdl-format/src/config/indent.rs b/wdl-format/src/config/indent.rs index 2b109adac..549f96f7f 100644 --- a/wdl-format/src/config/indent.rs +++ b/wdl-format/src/config/indent.rs @@ -1,11 +1,9 @@ //! Indentation within formatting configuration. use std::num::NonZeroUsize; -use std::sync::LazyLock; /// The default indentation. -pub static DEFAULT_INDENT: LazyLock = - LazyLock::new(|| Indent::Spaces(NonZeroUsize::new(4).unwrap())); +pub const DEFAULT_INDENT: Indent = Indent::Spaces(unsafe { NonZeroUsize::new_unchecked(4) }); /// An indentation level. #[derive(Clone, Copy, Debug)] @@ -19,6 +17,6 @@ pub enum Indent { impl Default for Indent { fn default() -> Self { - *DEFAULT_INDENT + DEFAULT_INDENT } } diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 17f547026..cad0364f7 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -6,8 +6,14 @@ mod pre; pub use post::*; pub use pre::*; +use crate::Config; +use std::fmt::Display; + /// Tokens that are streamable. -pub trait Token: Eq + PartialEq {} +pub trait Token: Eq + PartialEq { + /// Returns a displayable version of the token. + fn display(&self, config: &Config) -> impl Display; +} /// A stream of tokens. Tokens in this case are either [`PreToken`]s or /// [`PostToken`]s. Note that, unless you are working on formatting diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index eaf0cb014..caee2d21a 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -3,6 +3,8 @@ //! Generally speaking, unless you are working with the internals of code //! formatting, you're not going to be working with these. +use std::fmt::Display; + use wdl_ast::SyntaxKind; use crate::Comment; @@ -42,23 +44,22 @@ impl std::fmt::Debug for PostToken { } } -impl PostToken { +impl Token for PostToken { /// Returns a displayable version of the token. - pub fn display<'a>(&'a self, config: &'a crate::Config) -> Box { + fn display(&self, config: &crate::Config) -> impl Display { + let indent = match config.indent(){ + Indent::Spaces(n) => " ".repeat((n).into()), + Indent::Tabs(n) => "\t".repeat((n).into()), + }; match self { - Self::Space => Box::new(SPACE), - Self::Newline => Box::new(NEWLINE), - Self::Indent => Box::new(match config.indent() { - Indent::Spaces(n) => " ".repeat((n).into()), - Indent::Tabs(n) => "\t".repeat((n).into()), - }), - Self::Literal(value) => Box::new(value), + Self::Space => SPACE.to_string(), + Self::Newline => NEWLINE.to_string(), + Self::Indent => indent, + Self::Literal(value) => value.to_string(), } } } -impl Token for PostToken {} - /// Current position in a line. #[derive(Default, Eq, PartialEq)] enum LinePosition { diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 19fb52223..154c9469d 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -94,7 +94,12 @@ impl std::fmt::Display for PreToken { } } -impl Token for PreToken {} +impl Token for PreToken { + /// Returns a displayable version of the token. + fn display(&self, _config: &crate::Config) -> impl std::fmt::Display { + format!("{}", self) + } +} impl TokenStream { /// Inserts a blank line token to the stream if the stream does not already From 68b70b84f38b6f25410425c47df67171808866ae Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 15 Oct 2024 11:06:54 -0400 Subject: [PATCH 58/60] chore: cargo fmt --- wdl-format/src/token.rs | 3 ++- wdl-format/src/token/post.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index cad0364f7..8b57bc523 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -3,11 +3,12 @@ mod post; mod pre; +use std::fmt::Display; + pub use post::*; pub use pre::*; use crate::Config; -use std::fmt::Display; /// Tokens that are streamable. pub trait Token: Eq + PartialEq { diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index caee2d21a..30a592f9b 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -47,7 +47,7 @@ impl std::fmt::Debug for PostToken { impl Token for PostToken { /// Returns a displayable version of the token. fn display(&self, config: &crate::Config) -> impl Display { - let indent = match config.indent(){ + let indent = match config.indent() { Indent::Spaces(n) => " ".repeat((n).into()), Indent::Tabs(n) => "\t".repeat((n).into()), }; From 3cbf60bc66d40d91cd0cd7dcf955e55eef6e5d94 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 15 Oct 2024 13:37:59 -0400 Subject: [PATCH 59/60] fix: display for Token --- wdl-format/src/token.rs | 2 +- wdl-format/src/token/post.rs | 41 +++++++++++++++++++++++++++--------- wdl-format/src/token/pre.rs | 4 ++-- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 8b57bc523..21e3d8df6 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -13,7 +13,7 @@ use crate::Config; /// Tokens that are streamable. pub trait Token: Eq + PartialEq { /// Returns a displayable version of the token. - fn display(&self, config: &Config) -> impl Display; + fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a; } /// A stream of tokens. Tokens in this case are either [`PreToken`]s or diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 30a592f9b..62f7caea4 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -46,16 +46,37 @@ impl std::fmt::Debug for PostToken { impl Token for PostToken { /// Returns a displayable version of the token. - fn display(&self, config: &crate::Config) -> impl Display { - let indent = match config.indent() { - Indent::Spaces(n) => " ".repeat((n).into()), - Indent::Tabs(n) => "\t".repeat((n).into()), - }; - match self { - Self::Space => SPACE.to_string(), - Self::Newline => NEWLINE.to_string(), - Self::Indent => indent, - Self::Literal(value) => value.to_string(), + fn display<'a>(&'a self, config: &'a crate::Config) -> impl Display + 'a { + struct Display<'a> { + token: &'a PostToken, + config: &'a crate::Config, + } + + impl std::fmt::Display for Display<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.token { + PostToken::Space => write!(f, "{SPACE}"), + PostToken::Newline => write!(f, "{NEWLINE}"), + PostToken::Indent => { + let (c, n) = match self.config.indent() { + Indent::Spaces(n) => (' ', n), + Indent::Tabs(n) => ('\t', n), + }; + + for _ in 0..n.get() { + write!(f, "{c}")?; + } + + Ok(()) + } + PostToken::Literal(value) => write!(f, "{value}"), + } + } + } + + Display { + token: self, + config, } } } diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index 154c9469d..6ec41ed7d 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -96,8 +96,8 @@ impl std::fmt::Display for PreToken { impl Token for PreToken { /// Returns a displayable version of the token. - fn display(&self, _config: &crate::Config) -> impl std::fmt::Display { - format!("{}", self) + fn display<'a>(&'a self, _config: &'a crate::Config) -> impl std::fmt::Display { + self } } From 286602d0d5d320bd947261af133aa2cb1a096e5e Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 15 Oct 2024 13:44:29 -0400 Subject: [PATCH 60/60] Update post.rs --- wdl-format/src/token/post.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index 62f7caea4..1fe44316e 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -47,8 +47,11 @@ impl std::fmt::Debug for PostToken { impl Token for PostToken { /// Returns a displayable version of the token. fn display<'a>(&'a self, config: &'a crate::Config) -> impl Display + 'a { + /// A displayable version of a [`PostToken`]. struct Display<'a> { + /// The token to display. token: &'a PostToken, + /// The configuration to use. config: &'a crate::Config, }