diff --git a/.env.example b/.env.example
index 956931c18..0e6d51d58 100644
--- a/.env.example
+++ b/.env.example
@@ -29,9 +29,13 @@ SYNC_DESTINATION=123.456.789.123:~/nextclade
 
 # URL of Nextclade datasets server. See: https://github.com/neherlab/nextclade_data
 # Replace this with `http://localhost:27722` to use local data server instead
-DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org
+DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3
 # DATA_FULL_DOMAIN=http://localhost:27722
 
+# If enabled, Nextclade Web will first attempt to fetch datasets from the corresponding GitHub branch. If this attempt
+# fails, it will use `DATA_FULL_DOMAIN` as usual.
+DATA_TRY_GITHUB_BRANCH=0
+
 # Directory path (relative to the root of the project) from which local data server takes the data.
 # Useful for local testing on new datasets. See: https://github.com/neherlab/nextclade_data
 # It is recommended to keep the `nextclade_data` git repo in a sibling directory of `nextclade` git repo.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 71c9ea5cb..3ba13e0ce 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,2 +1 @@
-vers
 blank_issues_enabled: false
diff --git a/.github/workflows/cli.yml b/.github/workflows/cli.yml
index 4121a10d3..bb8b1bc0f 100644
--- a/.github/workflows/cli.yml
+++ b/.github/workflows/cli.yml
@@ -57,17 +57,17 @@ jobs:
       - name: "Setup environment (release)"
         if: endsWith(github.ref, '/release-cli')
         run: |
-          echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org" >> $GITHUB_ENV
+          echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org/v3" >> $GITHUB_ENV
 
       - name: "Setup environment (staging)"
         if: endsWith(github.ref, '/staging-cli')
         run: |
-          echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org" >> $GITHUB_ENV
+          echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org/v3" >> $GITHUB_ENV
 
       - name: "Setup environment (master)"
         if: ${{ !endsWith(github.ref, '/staging-cli') && !endsWith(github.ref, '/release-cli') }}
         run: |
-          echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org" >> $GITHUB_ENV
+          echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3" >> $GITHUB_ENV
 
       - name: "Checkout code"
         uses: actions/checkout@v3
@@ -106,7 +106,7 @@ jobs:
         run: |
           cp .env.example .env
           sed -i -e "s|OSXCROSS_URL=http://example.com/osxcross/osxcross.tar.xz|OSXCROSS_URL=${{ secrets.OSXCROSS_URL }}|g" .env
-          sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=${DATA_FULL_DOMAIN}|g" .env
+          sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3|DATA_FULL_DOMAIN=${DATA_FULL_DOMAIN}|g" .env
 
       - name: "Login to Docker Hub"
         uses: docker/login-action@v2
@@ -170,7 +170,6 @@ jobs:
         run: |
           cp .env.example .env
           sed -i -e "s|OSXCROSS_URL=http://example.com/osxcross/osxcross.tar.xz|OSXCROSS_URL=${{ secrets.OSXCROSS_URL }}|g" .env
-          sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|g" .env
 
       - name: "Run unit tests"
         run: |
@@ -217,7 +216,6 @@ jobs:
         run: |
           cp .env.example .env
           sed -i -e "s|OSXCROSS_URL=http://example.com/osxcross/osxcross.tar.xz|OSXCROSS_URL=${{ secrets.OSXCROSS_URL }}|g" .env
-          sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|g" .env
 
       - name: "Run lints"
         run: |
diff --git a/.github/workflows/web.yml b/.github/workflows/web.yml
index a61a80c57..062bdd39c 100644
--- a/.github/workflows/web.yml
+++ b/.github/workflows/web.yml
@@ -37,7 +37,7 @@ jobs:
         run: |
           echo "ENV_NAME=release" >> $GITHUB_ENV
           echo "FULL_DOMAIN=https://clades.nextstrain.org" >> $GITHUB_ENV
-          echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org" >> $GITHUB_ENV
+          echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org/v3" >> $GITHUB_ENV
           echo "PLAUSIBLE_IO_DOMAIN=clades.nextstrain.org" >> $GITHUB_ENV
 
       - name: "Setup environment (staging)"
@@ -45,7 +45,7 @@ jobs:
         run: |
           echo "ENV_NAME=staging" >> $GITHUB_ENV
           echo "FULL_DOMAIN=https://staging.clades.nextstrain.org" >> $GITHUB_ENV
-          echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org" >> $GITHUB_ENV
+          echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org/v3" >> $GITHUB_ENV
           echo "PLAUSIBLE_IO_DOMAIN=staging.clades.nextstrain.org" >> $GITHUB_ENV
 
       - name: "Setup environment (master)"
@@ -53,7 +53,7 @@ jobs:
         run: |
           echo "ENV_NAME=master" >> $GITHUB_ENV
           echo "FULL_DOMAIN=https://master.clades.nextstrain.org" >> $GITHUB_ENV
-          echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org" >> $GITHUB_ENV
+          echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3" >> $GITHUB_ENV
           echo "PLAUSIBLE_IO_DOMAIN=master.clades.nextstrain.org" >> $GITHUB_ENV
 
       - name: "Checkout code"
@@ -107,7 +107,7 @@ jobs:
         run: |
           cp .env.example .env
           sed -i -e "s|FULL_DOMAIN=autodetect|FULL_DOMAIN=${FULL_DOMAIN}|g" .env
-          sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=${DATA_FULL_DOMAIN}|g" .env
+          sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3|DATA_FULL_DOMAIN=${DATA_FULL_DOMAIN}|g" .env
 
       - name: "Login to Docker Hub"
         uses: docker/login-action@v2
diff --git a/.readthedocs.yml b/.readthedocs.yml
index e3c0f2ec2..3f6d5d02b 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,6 +1,11 @@
 ---
 version: 2
 
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "mambaforge-22.9"
+
 conda:
   environment: docs/environment.yml
 
diff --git a/Cargo.lock b/Cargo.lock
index f21399925..482e7bd77 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -75,16 +75,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.3.2"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
+checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c"
 dependencies = [
  "anstyle",
  "anstyle-parse",
  "anstyle-query",
  "anstyle-wincon",
  "colorchoice",
- "is-terminal",
  "utf8parse",
 ]
 
@@ -114,9 +113,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle-wincon"
-version = "1.0.1"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
+checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd"
 dependencies = [
  "anstyle",
  "windows-sys",
@@ -469,51 +468,42 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.3.10"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a"
+checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
 dependencies = [
  "clap_builder",
  "clap_derive",
- "once_cell",
-]
-
-[[package]]
-name = "clap-verbosity-flag"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1eef05769009513df2eb1c3b4613e7fad873a14c600ff025b08f250f59fee7de"
-dependencies = [
- "clap",
- "log",
 ]
 
 [[package]]
 name = "clap_builder"
-version = "4.3.10"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d"
+checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
 dependencies = [
  "anstream",
  "anstyle",
  "clap_lex",
  "strsim",
+ "unicase",
+ "unicode-width",
 ]
 
 [[package]]
 name = "clap_complete"
-version = "4.3.1"
+version = "4.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f6b5c519bab3ea61843a7923d074b04245624bb84a64a8c150f5deb014e388b"
+checksum = "4110a1e6af615a9e6d0a36f805d5c99099f8bab9b8042f5bc1fa220a4a89e36f"
 dependencies = [
  "clap",
 ]
 
 [[package]]
 name = "clap_complete_fig"
-version = "4.3.1"
+version = "4.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99fee1d30a51305a6c2ed3fc5709be3c8af626c9c958e04dd9ae94e27bcbce9f"
+checksum = "9e9bae21b3f6eb417ad3054c8b1094aa0542116eba4979b1b271baefbfa6b965"
 dependencies = [
  "clap",
  "clap_complete",
@@ -521,9 +511,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.3.2"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f"
+checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -1712,7 +1702,7 @@ dependencies = [
 
 [[package]]
 name = "nextclade"
-version = "2.14.0"
+version = "3.0.0-alpha.0"
 dependencies = [
  "assert2",
  "atty",
@@ -1722,7 +1712,6 @@ dependencies = [
  "bzip2",
  "chrono",
  "clap",
- "clap-verbosity-flag",
  "clap_complete",
  "clap_complete_fig",
  "color-eyre",
@@ -1748,6 +1737,7 @@ dependencies = [
  "num-traits",
  "num_cpus",
  "optfield",
+ "ordered-float",
  "owo-colors",
  "pretty_assertions",
  "rayon",
@@ -1761,6 +1751,7 @@ dependencies = [
  "serde_repr",
  "serde_stacker",
  "serde_yaml",
+ "strsim",
  "strum 0.25.0",
  "strum_macros 0.25.0",
  "tinytemplate",
@@ -1775,7 +1766,7 @@ dependencies = [
 
 [[package]]
 name = "nextclade-cli"
-version = "2.14.0"
+version = "3.0.0-alpha.0"
 dependencies = [
  "assert2",
  "clap",
@@ -1796,6 +1787,7 @@ dependencies = [
  "log",
  "nextclade",
  "num_cpus",
+ "ordered-float",
  "owo-colors",
  "pretty_assertions",
  "rayon",
@@ -1809,15 +1801,17 @@ dependencies = [
  "serde_json",
  "strum 0.25.0",
  "strum_macros 0.25.0",
+ "tinytemplate",
  "url",
  "zip",
 ]
 
 [[package]]
 name = "nextclade-web"
-version = "2.14.0"
+version = "3.0.0-alpha.0"
 dependencies = [
  "assert2",
+ "chrono",
  "console_error_panic_hook",
  "eyre",
  "getrandom",
@@ -1955,11 +1949,14 @@ dependencies = [
 
 [[package]]
 name = "ordered-float"
-version = "3.7.0"
+version = "3.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213"
+checksum = "2a54938017eacd63036332b4ae5c8a49fc8c0c1d6d629893057e4f13609edd06"
 dependencies = [
  "num-traits",
+ "rand",
+ "schemars",
+ "serde",
 ]
 
 [[package]]
@@ -2175,6 +2172,7 @@ dependencies = [
  "libc",
  "rand_chacha",
  "rand_core",
+ "serde",
 ]
 
 [[package]]
@@ -2194,6 +2192,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
  "getrandom",
+ "serde",
 ]
 
 [[package]]
@@ -2514,6 +2513,9 @@ name = "semver"
 version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
+dependencies = [
+ "serde",
+]
 
 [[package]]
 name = "serde"
@@ -3029,6 +3031,15 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
 
+[[package]]
+name = "unicase"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
+dependencies = [
+ "version_check",
+]
+
 [[package]]
 name = "unicode-bidi"
 version = "0.3.13"
diff --git a/docker-dev b/docker-dev
index 43da950ea..fc8d69e62 100755
--- a/docker-dev
+++ b/docker-dev
@@ -545,10 +545,8 @@ ${NICE} docker run --rm \
 if [ -n "${CROSS:-}" ] && [ -n "${RELEASE:-}" ] && { [ "${BUILD:-}" == 1 ] || [ "${RUN:-}" == 1 ]; }; then
   mkdir -p .out/
   if [[ "${CROSS}" == *windows* ]]; then
-    cp "${BUILD_DIR}/${CROSS}/release/nextalign.exe" ".out/nextalign-${CROSS}.exe"
     cp "${BUILD_DIR}/${CROSS}/release/nextclade.exe" ".out/nextclade-${CROSS}.exe"
   else
-    cp "${BUILD_DIR}/${CROSS}/release/nextalign" ".out/nextalign-${CROSS}"
     cp "${BUILD_DIR}/${CROSS}/release/nextclade" ".out/nextclade-${CROSS}"
   fi
 fi
diff --git a/docker/docker-prod-alpine.dockerfile b/docker/docker-prod-alpine.dockerfile
index 9fa498867..3117a3531 100644
--- a/docker/docker-prod-alpine.dockerfile
+++ b/docker/docker-prod-alpine.dockerfile
@@ -1,8 +1,8 @@
 FROM alpine:3
 
 COPY .out/nextclade-x86_64-unknown-linux-musl /usr/bin/nextclade
-COPY .out/nextalign-x86_64-unknown-linux-musl /usr/bin/nextalign
 
 RUN set -eux \
+&& ln -s /usr/bin/nextclade /usr/bin/nextalign \
 && ln -s /usr/bin/nextclade /nextclade \
 && ln -s /usr/bin/nextalign /nextalign
diff --git a/docker/docker-prod-debian.dockerfile b/docker/docker-prod-debian.dockerfile
index 9e17b5b3b..4ba48d45f 100644
--- a/docker/docker-prod-debian.dockerfile
+++ b/docker/docker-prod-debian.dockerfile
@@ -1,9 +1,9 @@
 FROM debian:11
 
 COPY .out/nextclade-x86_64-unknown-linux-gnu /usr/bin/nextclade
-COPY .out/nextalign-x86_64-unknown-linux-gnu /usr/bin/nextalign
 
 RUN set -eux \
+&& ln -s /usr/bin/nextclade /usr/bin/nextalign \
 && ln -s /usr/bin/nextclade /nextclade \
 && ln -s /usr/bin/nextalign /nextalign \
 && export DEBIAN_FRONTEND=noninteractive \
diff --git a/docker/docker-prod-scratch.dockerfile b/docker/docker-prod-scratch.dockerfile
index 17b3f08d4..94ed85b6d 100644
--- a/docker/docker-prod-scratch.dockerfile
+++ b/docker/docker-prod-scratch.dockerfile
@@ -1,4 +1,6 @@
 FROM scratch
 
 COPY .out/nextclade-x86_64-unknown-linux-musl /nextclade
-COPY .out/nextalign-x86_64-unknown-linux-musl /nextalign
+
+RUN set -eux \
+&& ln -s /usr/bin/nextclade /usr/bin/nextalign
diff --git a/docs/user/input-files.md b/docs/user/input-files.md
index 48a666efa..9c1acb0a1 100644
--- a/docs/user/input-files.md
+++ b/docs/user/input-files.md
@@ -247,13 +247,13 @@ Nextclade Web (simple and advanced modes): accepted in "Sequences" drag & drop b
 
 Nextclade CLI and Nextalign CLI accept fasta inputs as one or multiple positional arguments. Accepts plain or compressed FASTA files. If a compressed fasta file is provided, it will be transparently decompressed. Supported compression formats: `gz`, `bz2`, `xz`, `zstd`. Decompressor is chosen based on file extension. If there's multiple input files, then different files can have different compression formats. If positional arguments provided, the plain fasta input is read from standard input (stdin).
 
-Accepted formats: [FASTA](https://en.wikipedia.org/wiki/FASTA_format) or plain text (one sequence per line).
+Accepted formats: [FASTA](https://en.wikipedia.org/wiki/FASTA_format)
 
 ## Reference (root) sequence
 
 Viral nucleotide sequence which serves as a reference for alignment and the analysis. Mutations are called relative to the reference sequence. It is expected to be the root of the [reference tree](#reference-tree). The best results are obtained when the reference sequence is a well-known consensus genome, of a very high quality, preferably complete and unambiguous (spans entire genome and has no ambiguous nucleotides).
 
-Accepted formats: [FASTA](https://en.wikipedia.org/wiki/FASTA_format) or plain text. The file is expected to contain only 1 sequence.
+Accepted formats: [FASTA](https://en.wikipedia.org/wiki/FASTA_format) file containing exactly 1 sequence.
 
 Nextclade Web (advanced mode): accepted in "Root sequence" drag & drop box. A remote URL is also accepted in `input-root-sequence` URL parameter.
 
diff --git a/packages_rs/nextclade-cli/Cargo.toml b/packages_rs/nextclade-cli/Cargo.toml
index 36e87ce42..b82b3e730 100644
--- a/packages_rs/nextclade-cli/Cargo.toml
+++ b/packages_rs/nextclade-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "nextclade-cli"
-version = "2.14.0"
+version = "3.0.0-alpha.0"
 description = "Alignment, mutation calling, phylogenetic placement, clade assignment and quality control checks for viral genetic sequences. CLI module."
 repository = "https://github.com/nextstrain/nextclade"
 documentation = "https://docs.nextstrain.org/projects/nextclade/en/stable/"
@@ -11,9 +11,9 @@ publish = false
 
 [dependencies]
 assert2 = "=0.3.11"
-clap = { version = "=4.3.10", features = ["derive"] }
-clap_complete = "=4.3.1"
-clap_complete_fig = "=4.3.1"
+clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] }
+clap_complete = "=4.4.1"
+clap_complete_fig = "=4.4.0"
 color-eyre = "=0.6.2"
 comfy-table = "=7.0.1"
 crossbeam = "=0.8.2"
@@ -28,17 +28,19 @@ lazy_static = "=1.4.0"
 log = "=0.4.19"
 nextclade = { path = "../nextclade" }
 num_cpus = "=1.16.0"
+ordered-float = { version = "=3.9.1", features = ["rand", "serde", "schemars"] }
 owo-colors = "=3.5.0"
 pretty_assertions = "=1.3.0"
 rayon = "=1.7.0"
 regex = "=1.8.4"
 reqwest = { version = "=0.11.18", default-features = false, features = ["blocking", "deflate", "gzip", "brotli", "socks", "rustls-tls"] }
 schemars = { version = "=0.8.12", features = ["chrono", "either", "enumset", "indexmap1"] }
-semver = "=1.0.17"
+semver = { version = "=1.0.17", features = ["serde"] }
 serde = { version = "=1.0.164", features = ["derive"] }
 serde_json = { version = "=1.0.99", features = ["preserve_order", "indexmap", "unbounded_depth"] }
 strum = "=0.25.0"
 strum_macros = "=0.25"
+tinytemplate = "=1.2.1"
 url = { version = "=2.4.0", features = ["serde"] }
 zip = { version = "=0.6.6", default-features = false, features = ["aes-crypto", "bzip2", "deflate", "time"] }
 
diff --git a/packages_rs/nextclade-cli/src/bin/featuretree.rs b/packages_rs/nextclade-cli/src/bin/featuretree.rs
deleted file mode 100644
index 27ca1c590..000000000
--- a/packages_rs/nextclade-cli/src/bin/featuretree.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-use clap::{Parser, ValueHint};
-use ctor::ctor;
-use eyre::Report;
-use log::LevelFilter;
-use nextclade::features::feature_tree::FeatureTree;
-use nextclade::io::json::{json_stringify, json_write, JsonPretty};
-use nextclade::io::yaml::yaml_write;
-use nextclade::utils::global_init::global_init;
-use nextclade::utils::global_init::setup_logger;
-use std::fmt::Debug;
-use std::path::PathBuf;
-
-#[ctor]
-fn init() {
-  global_init();
-}
-
-#[derive(Parser, Debug)]
-#[clap(name = "featuremap")]
-#[clap(author, version)]
-#[clap(verbatim_doc_comment)]
-pub struct FeaturemapArgs {
-  /// Path to input GFF3 file
-  #[clap(value_hint = ValueHint::FilePath)]
-  #[clap(hide_long_help = true, hide_short_help = true)]
-  pub input_feature_map: PathBuf,
-
-  /// Path to output file
-  #[clap(long, short = 'o')]
-  #[clap(value_hint = ValueHint::DirPath)]
-  pub output: Option<PathBuf>,
-
-  /// Print output in JSON format
-  #[clap(long)]
-  pub json: bool,
-}
-
-fn main() -> Result<(), Report> {
-  let args = FeaturemapArgs::parse();
-  setup_logger(LevelFilter::Warn);
-  let feature_tree = FeatureTree::from_gff3_file(args.input_feature_map)?;
-
-  if let Some(output) = args.output {
-    if output.ends_with("yaml") || output.ends_with("yml") {
-      yaml_write(output, &feature_tree)?;
-    } else {
-      json_write(output, &feature_tree, JsonPretty(true))?;
-    }
-  }
-
-  if args.json {
-    println!("{}\n", json_stringify(&feature_tree, JsonPretty(true))?);
-  } else {
-    println!("{}", &feature_tree.to_pretty_string()?);
-  }
-  Ok(())
-}
diff --git a/packages_rs/nextclade-cli/src/bin/genemap.rs b/packages_rs/nextclade-cli/src/bin/genemap.rs
deleted file mode 100644
index 27a53e0c9..000000000
--- a/packages_rs/nextclade-cli/src/bin/genemap.rs
+++ /dev/null
@@ -1,58 +0,0 @@
-use clap::{Parser, ValueHint};
-use ctor::ctor;
-use eyre::Report;
-use log::LevelFilter;
-use nextclade::gene::gene_map::GeneMap;
-use nextclade::gene::gene_map_display::gene_map_to_table_string;
-use nextclade::io::json::{json_stringify, json_write, JsonPretty};
-use nextclade::io::yaml::yaml_write;
-use nextclade::utils::global_init::global_init;
-use nextclade::utils::global_init::setup_logger;
-use std::fmt::Debug;
-use std::path::PathBuf;
-
-#[ctor]
-fn init() {
-  global_init();
-}
-
-#[derive(Parser, Debug)]
-#[clap(name = "genemap")]
-#[clap(author, version)]
-#[clap(verbatim_doc_comment)]
-pub struct GenemapArgs {
-  #[clap(value_hint = ValueHint::FilePath)]
-  #[clap(hide_long_help = true, hide_short_help = true)]
-  pub input_gene_map: PathBuf,
-
-  /// Path to output file
-  #[clap(long, short = 'o')]
-  #[clap(value_hint = ValueHint::DirPath)]
-  pub output: Option<PathBuf>,
-
-  /// Print output in JSON format
-  #[clap(long)]
-  pub json: bool,
-}
-
-fn main() -> Result<(), Report> {
-  let args = GenemapArgs::parse();
-  setup_logger(LevelFilter::Warn);
-  let gene_map = GeneMap::from_file(args.input_gene_map)?;
-
-  if let Some(output) = args.output {
-    if output.to_string_lossy().ends_with("yaml") || output.to_string_lossy().ends_with("yml") {
-      yaml_write(output, &gene_map)?;
-    } else {
-      json_write(output, &gene_map, JsonPretty(true))?;
-    }
-  }
-
-  if args.json {
-    println!("{}\n", json_stringify(&gene_map, JsonPretty(true))?);
-  } else {
-    println!("{}", gene_map_to_table_string(&gene_map)?);
-  }
-
-  Ok(())
-}
diff --git a/packages_rs/nextclade-cli/src/bin/nextalign.rs b/packages_rs/nextclade-cli/src/bin/nextalign.rs
deleted file mode 100644
index 7f23d05a6..000000000
--- a/packages_rs/nextclade-cli/src/bin/nextalign.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-use ctor::ctor;
-use eyre::Report;
-use nextclade::utils::global_init::global_init;
-use nextclade_cli::cli::nextalign_cli::nextalign_handle_cli_args;
-
-#[ctor]
-fn init() {
-  global_init();
-}
-
-fn main() -> Result<(), Report> {
-  nextalign_handle_cli_args()
-}
diff --git a/packages_rs/nextclade-cli/src/cli/mod.rs b/packages_rs/nextclade-cli/src/cli/mod.rs
index 99ee2c0ab..21edd855b 100644
--- a/packages_rs/nextclade-cli/src/cli/mod.rs
+++ b/packages_rs/nextclade-cli/src/cli/mod.rs
@@ -1,9 +1,8 @@
-pub mod nextalign_cli;
-pub mod nextalign_loop;
-pub mod nextalign_ordered_writer;
 pub mod nextclade_cli;
 pub mod nextclade_dataset_get;
 pub mod nextclade_dataset_list;
 pub mod nextclade_loop;
 pub mod nextclade_ordered_writer;
+pub mod nextclade_read_annotation;
+pub mod nextclade_seq_sort;
 pub mod verbosity;
diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_cli.rs b/packages_rs/nextclade-cli/src/cli/nextalign_cli.rs
deleted file mode 100644
index 5a16a16c1..000000000
--- a/packages_rs/nextclade-cli/src/cli/nextalign_cli.rs
+++ /dev/null
@@ -1,464 +0,0 @@
-use crate::cli::nextalign_loop::nextalign_run;
-use crate::cli::nextclade_cli::{check_shells, SHELLS};
-use crate::cli::verbosity::{Verbosity, WarnLevel};
-use clap::{CommandFactory, Parser, Subcommand, ValueEnum, ValueHint};
-use clap_complete::{generate, Generator, Shell};
-use clap_complete_fig::Fig;
-use eyre::{eyre, ContextCompat, Report, WrapErr};
-use itertools::Itertools;
-use nextclade::align::params::AlignPairwiseParamsOptional;
-use nextclade::io::fs::add_extension;
-use nextclade::make_error;
-use nextclade::utils::global_init::setup_logger;
-use std::fmt::Debug;
-use std::io;
-use std::path::PathBuf;
-use strum::IntoEnumIterator;
-use strum_macros::EnumIter;
-
-#[derive(Parser, Debug)]
-#[clap(name = "nextalign")]
-#[clap(author, version)]
-#[clap(verbatim_doc_comment)]
-/// Viral sequence alignment and translation.
-///
-/// Nextalign is a part of Nextstrain: https://nextstrain.org
-///
-/// Documentation: https://docs.nextstrain.org/projects/nextclade
-/// Nextclade Web: https://clades.nextstrain.org
-/// Publication:   https://doi.org/10.21105/joss.03773
-///
-/// Please read short help with `nextalign -h` and extended help with `nextalign --help`. Each subcommand has its own help, for example: `nextclade run --help`.
-pub struct NextalignArgs {
-  #[clap(subcommand)]
-  pub command: NextalignCommands,
-
-  /// Make output more quiet or more verbose
-  #[clap(flatten, next_help_heading = "  Verbosity")]
-  pub verbosity: Verbosity<WarnLevel>,
-}
-
-#[derive(Subcommand, Debug)]
-#[clap(verbatim_doc_comment)]
-pub enum NextalignCommands {
-  /// Generate shell completions.
-  ///
-  /// This will print the completions file contents to the console. Refer to your shell's documentation on how to install the completions.
-  ///
-  /// Example for Ubuntu Linux:
-  ///
-  ///    nextalign completions bash > ~/.local/share/bash-completion/nextalign
-  ///
-  Completions {
-    /// Name of the shell to generate appropriate completions
-    #[clap(value_name = "SHELL", default_value_t = String::from("bash"), value_parser = check_shells)]
-    shell: String,
-  },
-
-  /// Run alignment and translation.
-  ///
-  /// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade run --help`.
-  Run(Box<NextalignRunArgs>),
-}
-
-#[derive(Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, EnumIter)]
-pub enum NextalignOutputSelection {
-  All,
-  Fasta,
-  Translations,
-  Insertions,
-  Errors,
-}
-
-#[derive(Parser, Debug)]
-pub struct NextalignRunInputArgs {
-  /// Path to one or multiple FASTA files with input sequences
-  ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". If no files provided, the plain fasta input is read from standard input (stdin).
-  ///
-  /// See: https://en.wikipedia.org/wiki/FASTA_format
-  #[clap(value_hint = ValueHint::FilePath)]
-  #[clap(display_order = 1)]
-  pub input_fastas: Vec<PathBuf>,
-
-  /// REMOVED. Use positional arguments instead.
-  ///
-  /// Example: nextalign run -D dataset/ -O out/ seq1.fasta seq2.fasta
-  #[clap(long, short = 'i', visible_alias("sequences"))]
-  #[clap(value_hint = ValueHint::FilePath)]
-  #[clap(hide_long_help = true, hide_short_help = true)]
-  pub input_fasta: Option<PathBuf>,
-
-  /// Path to a FASTA file containing reference sequence. This file should contain exactly 1 sequence.
-  ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
-  #[clap(long, short = 'r', visible_alias("reference"))]
-  #[clap(value_hint = ValueHint::FilePath)]
-  pub input_ref: PathBuf,
-
-  /// Path to a .gff file containing the gene map (genome annotation).
-  ///
-  /// Gene map (sometimes also called 'genome annotation') is used to find coding regions. If not supplied, coding regions will
-  /// not be translated, amino acid sequences will not be output, and nucleotide sequence
-  /// alignment will not be informed by codon boundaries
-  ///
-  /// List of genes can be restricted using `--genes` flag. Otherwise all genes found in the gene map will be used.
-  ///
-  /// Learn more about Generic Feature Format Version 3 (GFF3):
-  /// https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
-  ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
-  #[clap(long, short = 'm', alias = "genemap")]
-  #[clap(value_hint = ValueHint::FilePath)]
-  pub input_gene_map: Option<PathBuf>,
-
-  /// Comma-separated list of names of genes to use.
-  ///
-  /// This defines which peptides will be written into outputs, and which genes will be taken into account during
-  /// codon-aware alignment. Must only contain gene names present in the gene map. If
-  /// this flag is not supplied or its value is an empty string, then all genes found in the gene map will be used.
-  ///
-  /// Requires `--input-gene-map` to be specified.
-  #[clap(
-    long,
-    short = 'g',
-    num_args=1..,
-    use_value_delimiter = true
-  )]
-  #[clap(value_hint = ValueHint::FilePath)]
-  pub genes: Option<Vec<String>>,
-}
-
-#[derive(Parser, Debug)]
-pub struct NextalignRunOutputArgs {
-  /// REMOVED. Use `--output-all` instead
-  #[clap(long)]
-  #[clap(value_hint = ValueHint::DirPath)]
-  #[clap(hide_long_help = true, hide_short_help = true)]
-  pub output_dir: Option<PathBuf>,
-
-  /// Produce all of the output files into this directory, using default basename and predefined suffixes and extensions. This is equivalent to specifying each of the individual `--output-*` flags. Convenient when you want to receive all or most of output files into the same directory and don't care about their filenames.
-  ///
-  /// Output files can be optionally included or excluded using `--output-selection` flag.
-  /// The base filename can be set using `--output-basename` flag.
-  ///
-  /// If both the `--output-all` and individual `--output-*` flags are provided, each individual flag overrides the corresponding default output path.
-  ///
-  /// At least one of the output flags is required: `--output-all`, `--output-fasta`, `--output-translations`, `--output-insertions`, `--output-errors`
-  ///
-  /// If the required directory tree does not exist, it will be created.
-  #[clap(long, short = 'O')]
-  #[clap(value_hint = ValueHint::DirPath)]
-  pub output_all: Option<PathBuf>,
-
-  /// Set the base filename to use for output files.
-  ///
-  /// By default the base filename is extracted from the input sequences file (provided with `--input-fasta`).
-  ///
-  /// Only valid together with `--output-all` flag.
-  #[clap(long, short = 'n')]
-  #[clap(requires = "output_all")]
-  pub output_basename: Option<String>,
-
-  /// Restricts outputs for `--output-all` flag.
-  ///
-  /// Should contain a comma-separated list of names of output files to produce.
-  ///
-  /// If 'all' is present in the list, then all other entries are ignored and all outputs are produced.
-  ///
-  /// Only valid together with `--output-all` flag.
-  #[clap(
-    long,
-    short = 's',
-    num_args=1..,
-    use_value_delimiter = true
-  )]
-  #[clap(requires = "output_all")]
-  #[clap(value_enum)]
-  pub output_selection: Vec<NextalignOutputSelection>,
-
-  /// Path to output FASTA file with aligned sequences.
-  ///
-  /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
-  ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
-  ///
-  /// If the required directory tree does not exist, it will be created.
-  #[clap(long, short = 'o')]
-  #[clap(value_hint = ValueHint::AnyPath)]
-  pub output_fasta: Option<PathBuf>,
-
-  /// Template string for path to output fasta files containing translated and aligned peptides. A separate file will be generated for every gene.
-  /// The string should contain template variable `{gene}`, where the gene name will be substituted.
-  /// Make sure you properly quote and/or escape the curly braces, so that your shell, programming language or pipeline manager does not attempt to substitute the variables.
-  ///
-  /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
-  ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
-  ///
-  /// If the required directory tree does not exist, it will be created.
-  ///
-  /// Example for bash shell:
-  ///
-  ///   --output-translations='output_dir/gene_{gene}.translation.fasta'
-  #[clap(long, short = 'P')]
-  #[clap(value_hint = ValueHint::AnyPath)]
-  pub output_translations: Option<String>,
-
-  /// Path to output CSV file that contain insertions stripped from the reference alignment.
-  ///
-  /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
-  ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
-  ///
-  /// If the required directory tree does not exist, it will be created.
-  #[clap(long, short = 'I')]
-  #[clap(value_hint = ValueHint::AnyPath)]
-  pub output_insertions: Option<PathBuf>,
-
-  /// Path to output CSV file containing errors and warnings occurred during processing
-  ///
-  /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
-  ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
-  ///
-  /// If the required directory tree does not exist, it will be created.
-  #[clap(long, short = 'e')]
-  #[clap(value_hint = ValueHint::AnyPath)]
-  pub output_errors: Option<PathBuf>,
-
-  /// Whether to include aligned reference nucleotide sequence into output nucleotide sequence FASTA file and reference peptides into output peptide FASTA files.
-  #[clap(long)]
-  pub include_reference: bool,
-
-  /// Emit output sequences in-order.
-  ///
-  /// With this flag the program will wait for results from the previous sequences to be written to the output files before writing the results of the next sequences, preserving the same order as in the input file. Due to variable sequence processing times, this might introduce unnecessary waiting times, but ensures that the resulting sequences are written in the same order as they occur in the inputs (except for sequences which have errors).
-  /// By default, without this flag, processing might happen out of order, which is faster, due to the elimination of waiting, but might also lead to results written out of order - the order of results is not specified and depends on thread scheduling and processing times of individual sequences.
-  ///
-  /// This option is only relevant when `--jobs` is greater than 1 or is omitted.
-  ///
-  /// Note: the sequences which trigger errors during processing will be omitted from outputs, regardless of this flag.
-  #[clap(long)]
-  pub in_order: bool,
-
-  /// Replace unknown nucleotide characters with 'N'
-  ///
-  /// By default, the sequences containing unknown nucleotide nucleotide characters are skipped with a warning - they
-  /// are not aligned and not included into results. If this flag is provided, then before the alignment,
-  /// all unknown characters are replaced with 'N'. This replacement allows to align these sequences.
-  ///
-  /// The following characters are considered known:  '-', 'A', 'B', 'C', 'D', 'G', 'H', 'K', 'M', 'N', 'R', 'S', 'T', 'V', 'W', 'Y'
-  #[clap(long)]
-  pub replace_unknown: bool,
-}
-
-#[derive(Parser, Debug)]
-pub struct NextalignRunOtherArgs {
-  /// Number of processing jobs. If not specified, all available CPU threads will be used.
-  #[clap(global = false, long, short = 'j', default_value_t = num_cpus::get())]
-  pub jobs: usize,
-}
-
-#[derive(Parser, Debug)]
-pub struct NextalignRunArgs {
-  #[clap(flatten, next_help_heading = "  Inputs")]
-  pub inputs: NextalignRunInputArgs,
-
-  #[clap(flatten, next_help_heading = "  Outputs")]
-  pub outputs: NextalignRunOutputArgs,
-
-  #[clap(flatten, next_help_heading = "  Alignment parameters")]
-  pub alignment_params: AlignPairwiseParamsOptional,
-
-  #[clap(flatten, next_help_heading = "  Other")]
-  pub other: NextalignRunOtherArgs,
-}
-
-fn generate_completions(shell: &str) -> Result<(), Report> {
-  let mut command = NextalignArgs::command();
-
-  if shell.to_lowercase() == "fig" {
-    generate(Fig, &mut command, "nextalign", &mut io::stdout());
-    return Ok(());
-  }
-
-  let generator = Shell::from_str(&shell.to_lowercase(), true)
-    .map_err(|err| eyre!("{}: Possible values: {}", err, SHELLS.join(", ")))?;
-
-  let bin_name = command.get_name().to_owned();
-
-  generate(generator, &mut command, bin_name, &mut io::stdout());
-
-  Ok(())
-}
-
-/// Get output filenames provided by user or, if not provided, create filenames based on input fasta
-pub fn nextalign_get_output_filenames(run_args: &mut NextalignRunArgs) -> Result<(), Report> {
-  let NextalignRunArgs {
-    inputs:
-      NextalignRunInputArgs {
-        input_fastas,
-        input_ref,
-        input_gene_map,
-        genes,
-        ..
-      },
-    outputs:
-      NextalignRunOutputArgs {
-        output_all,
-        output_basename,
-        output_selection,
-        output_fasta,
-        output_translations,
-        output_insertions,
-        output_errors,
-        include_reference,
-        in_order,
-        ..
-      },
-    other: NextalignRunOtherArgs { jobs },
-    alignment_params,
-  } = run_args;
-
-  // If `--output-all` is provided, then we need to deduce default output filenames,
-  // while taking care to preserve values of any individual `--output-*` flags,
-  // as well as to honor restrictions put by the `--output-selection` flag, if provided.
-  if let Some(output_all) = output_all {
-    let output_basename = output_basename.clone().unwrap_or_else(|| "nextalign".to_owned());
-
-    let default_output_file_path = output_all.join(&output_basename);
-
-    // If `--output-selection` is empty or contains `all`, then fill it with all possible variants
-    if output_selection.is_empty() || output_selection.contains(&NextalignOutputSelection::All) {
-      *output_selection = NextalignOutputSelection::iter().collect_vec();
-    }
-
-    // We use `Option::get_or_insert()` mutable method here in order
-    // to set default output filenames only if they are not provided.
-
-    if output_selection.contains(&NextalignOutputSelection::Fasta) {
-      output_fasta.get_or_insert(add_extension(&default_output_file_path, "aligned.fasta"));
-    }
-
-    if output_selection.contains(&NextalignOutputSelection::Insertions) {
-      let output_insertions =
-        output_insertions.get_or_insert(add_extension(&default_output_file_path, "insertions.csv"));
-    }
-
-    if output_selection.contains(&NextalignOutputSelection::Errors) {
-      let output_errors = output_errors.get_or_insert(add_extension(&default_output_file_path, "errors.csv"));
-    }
-
-    if output_selection.contains(&NextalignOutputSelection::Translations) {
-      let output_translations = {
-        let output_translations_path =
-          default_output_file_path.with_file_name(format!("{output_basename}_gene_{{gene}}"));
-        let output_translations_path = add_extension(output_translations_path, "translation.fasta");
-
-        let output_translations_template = output_translations_path
-          .to_str()
-          .wrap_err_with(|| format!("When converting path to string: '{output_translations_path:?}'"))?
-          .to_owned();
-
-        output_translations.get_or_insert(output_translations_template)
-      };
-    }
-  }
-
-  if let Some(output_translations) = output_translations {
-    if !output_translations.contains("{gene}") {
-      return make_error!(
-        r#"
-Expected `--output-translations` argument to contain a template string containing template variable {{gene}} (with curly braces), but received:
-
-  {output_translations}
-
-Make sure the variable is not substituted by your shell, programming language or workflow manager. Apply proper escaping as needed.
-Example for bash shell:
-
-  --output-translations='output_dir/gene_{{gene}}.translation.fasta'
-
-      "#
-      );
-    }
-  }
-
-  let all_outputs_are_missing = [output_all, output_fasta, output_insertions, output_errors]
-    .iter()
-    .all(|o| o.is_none())
-    && output_translations.is_none();
-
-  if all_outputs_are_missing {
-    return make_error!(
-      r#"No output flags provided.
-
-At least one of the following flags is required:
-  --output-all
-  --output-fasta
-  --output-translations
-  --output-insertions
-  --output-errors"#
-    );
-  }
-
-  Ok(())
-}
-
-const ERROR_MSG_INPUT_FASTA_REMOVED: &str = r#"The argument `--input-fasta` (alias: `--sequences`, `-i`) is removed in favor of positional arguments.
-
-Try:
-
-  nextalign run -r ref.fasta -m genemap.gff -O out/ seq1.fasta seq2.fasta
-
-                                                       ^          ^
-                                              one or multiple positional arguments
-                                                with paths to input fasta files
-
-
-When positional arguments are not provided, nextalign will read input fasta from standard input.
-
-For more information, type
-
-  nextalign run --help"#;
-
-const ERROR_MSG_OUTPUT_DIR_REMOVED: &str = r#"The argument `--output-dir` is removed in favor of `--output-all`.
-
-When provided, `--output-all` allows to write all possible outputs into a directory.
-
-The defaut base name of the files can be overriden with `--output-basename` argument.
-
-The set of output files can be restricted with `--output-selection` argument.
-
-For more information, type:
-
-  nextalign run --help"#;
-
-pub fn nextalign_check_removed_args(run_args: &mut NextalignRunArgs) -> Result<(), Report> {
-  if run_args.inputs.input_fasta.is_some() {
-    return make_error!("{ERROR_MSG_INPUT_FASTA_REMOVED}");
-  }
-
-  if run_args.outputs.output_dir.is_some() {
-    return make_error!("{ERROR_MSG_OUTPUT_DIR_REMOVED}");
-  }
-
-  Ok(())
-}
-
-pub fn nextalign_handle_cli_args() -> Result<(), Report> {
-  let args = NextalignArgs::parse();
-
-  setup_logger(args.verbosity.get_filter_level());
-
-  match args.command {
-    NextalignCommands::Completions { shell } => {
-      generate_completions(&shell).wrap_err_with(|| format!("When generating completions for shell '{shell}'"))
-    }
-    NextalignCommands::Run(mut run_args) => {
-      nextalign_check_removed_args(&mut run_args)?;
-      nextalign_get_output_filenames(&mut run_args).wrap_err("When deducing output filenames")?;
-      nextalign_run(*run_args)
-    }
-  }
-}
diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs b/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs
deleted file mode 100644
index 2e8c7b95c..000000000
--- a/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs
+++ /dev/null
@@ -1,183 +0,0 @@
-use crate::cli::nextalign_cli::{
-  NextalignRunArgs, NextalignRunInputArgs, NextalignRunOtherArgs, NextalignRunOutputArgs,
-};
-use crate::cli::nextalign_ordered_writer::NextalignOrderedWriter;
-use eyre::{Report, WrapErr};
-use log::info;
-use nextclade::align::gap_open::{get_gap_open_close_scores_codon_aware, get_gap_open_close_scores_flat};
-use nextclade::align::params::AlignPairwiseParams;
-use nextclade::align::seed_match2::CodonSpacedIndex;
-use nextclade::alphabet::nuc::{to_nuc_seq, to_nuc_seq_replacing};
-use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
-use nextclade::gene::gene_map_display::gene_map_to_table_string;
-use nextclade::io::fasta::{read_one_fasta, FastaReader, FastaRecord};
-use nextclade::run::nextalign_run_one::nextalign_run_one;
-use nextclade::translate::translate_genes_ref::translate_genes_ref;
-use nextclade::types::outputs::NextalignOutputs;
-
-pub struct NextalignRecord {
-  pub index: usize,
-  pub seq_name: String,
-  pub outputs_or_err: Result<NextalignOutputs, Report>,
-}
-
-pub fn nextalign_run(run_args: NextalignRunArgs) -> Result<(), Report> {
-  info!("Command-line arguments:\n{run_args:#?}");
-
-  let NextalignRunArgs {
-    inputs:
-      NextalignRunInputArgs {
-        input_fastas,
-        input_ref,
-        input_gene_map,
-        genes,
-        ..
-      },
-    outputs:
-      NextalignRunOutputArgs {
-        output_all,
-        output_basename,
-        output_selection,
-        output_fasta,
-        output_translations,
-        output_insertions,
-        output_errors,
-        include_reference,
-        replace_unknown,
-        in_order,
-        ..
-      },
-    other: NextalignRunOtherArgs { jobs },
-    alignment_params: alignment_params_from_cli,
-  } = run_args;
-
-  let mut alignment_params = AlignPairwiseParams::default();
-
-  // Merge alignment params coming from CLI arguments
-  alignment_params.merge_opt(alignment_params_from_cli);
-
-  let ref_record = &read_one_fasta(input_ref)?;
-  let ref_seq = &to_nuc_seq(&ref_record.seq).wrap_err("When reading reference sequence")?;
-  let seed_index = &CodonSpacedIndex::from_sequence(ref_seq);
-
-  let gene_map = match input_gene_map {
-    Some(input_gene_map) => {
-      let gene_map = GeneMap::from_file(input_gene_map)?;
-      filter_gene_map(Some(gene_map), &genes)?
-    }
-    None => GeneMap::new(),
-  };
-
-  info!("Gene map:\n{}", gene_map_to_table_string(&gene_map)?);
-
-  let gap_open_close_nuc = &get_gap_open_close_scores_codon_aware(ref_seq, &gene_map, &alignment_params);
-  let gap_open_close_aa = &get_gap_open_close_scores_flat(ref_seq, &alignment_params);
-
-  let ref_peptides = &translate_genes_ref(ref_seq, &gene_map, &alignment_params)?;
-
-  std::thread::scope(|s| {
-    const CHANNEL_SIZE: usize = 128;
-    let (fasta_sender, fasta_receiver) = crossbeam_channel::bounded::<FastaRecord>(CHANNEL_SIZE);
-    let (result_sender, result_receiver) = crossbeam_channel::bounded::<NextalignRecord>(CHANNEL_SIZE);
-
-    s.spawn(|| {
-      let mut reader = FastaReader::from_paths(&input_fastas).unwrap();
-      loop {
-        let mut record = FastaRecord::default();
-        reader.read(&mut record).unwrap();
-        if record.is_empty() {
-          break;
-        }
-        fasta_sender
-          .send(record)
-          .wrap_err("When sending a FastaRecord")
-          .unwrap();
-      }
-      drop(fasta_sender);
-    });
-
-    let gene_map = &gene_map;
-    for _ in 0..jobs {
-      let fasta_receiver = fasta_receiver.clone();
-      let result_sender = result_sender.clone();
-      let gap_open_close_nuc = &gap_open_close_nuc;
-      let gap_open_close_aa = &gap_open_close_aa;
-      let alignment_params = &alignment_params;
-
-      s.spawn(move || {
-        let result_sender = result_sender.clone();
-
-        for FastaRecord { seq_name, seq, index } in &fasta_receiver {
-          info!("Processing sequence '{seq_name}'");
-
-          let outputs_or_err = if replace_unknown {
-            Ok(to_nuc_seq_replacing(&seq))
-          } else {
-            to_nuc_seq(&seq)
-          }
-          .wrap_err_with(|| format!("When processing sequence #{index} '{seq_name}'"))
-          .and_then(|qry_seq| {
-            nextalign_run_one(
-              index,
-              &seq_name,
-              &qry_seq,
-              ref_seq,
-              seed_index,
-              ref_peptides,
-              gene_map,
-              gap_open_close_nuc,
-              gap_open_close_aa,
-              alignment_params,
-            )
-          });
-
-          let record = NextalignRecord {
-            index,
-            seq_name,
-            outputs_or_err,
-          };
-
-          // Important: **all** records should be sent into this channel, without skipping.
-          // In in-order mode, writer that receives from this channel expects a contiguous stream of indices. Gaps in
-          // the indices will cause writer to stall waiting for the missing index and the buffering queue to grow. Any
-          // filtering of records should be done in the writer, instead of here.
-          result_sender
-            .send(record)
-            .wrap_err("When sending NextalignRecord")
-            .unwrap();
-        }
-
-        drop(result_sender);
-      });
-    }
-
-    s.spawn(move || {
-      let mut output_writer = NextalignOrderedWriter::new(
-        gene_map,
-        &output_fasta,
-        &output_translations,
-        &output_insertions,
-        &output_errors,
-        in_order,
-      )
-      .wrap_err("When creating output writer")
-      .unwrap();
-
-      if include_reference {
-        output_writer
-          .write_ref(ref_record, ref_peptides)
-          .wrap_err("When writing output record for ref sequence")
-          .unwrap();
-      }
-
-      for record in result_receiver {
-        output_writer
-          .write_record(record)
-          .wrap_err("When writing output record")
-          .unwrap();
-      }
-    });
-  });
-
-  Ok(())
-}
diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
deleted file mode 100644
index 9498ffe00..000000000
--- a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
+++ /dev/null
@@ -1,184 +0,0 @@
-use crate::cli::nextalign_loop::NextalignRecord;
-use eyre::{Report, WrapErr};
-use log::{info, warn};
-use nextclade::alphabet::nuc::from_nuc_seq;
-use nextclade::gene::gene_map::GeneMap;
-use nextclade::io::errors_csv::ErrorsCsvWriter;
-use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
-use nextclade::io::insertions_csv::InsertionsCsvWriter;
-use nextclade::translate::translate_genes::Translation;
-use nextclade::types::outputs::NextalignOutputs;
-use nextclade::utils::error::report_to_string;
-use nextclade::utils::option::OptionMapRefFallible;
-use std::collections::HashMap;
-use std::path::PathBuf;
-
-/// Writes output files, potentially preserving the initial order of records (same as in the inputs)
-pub struct NextalignOrderedWriter<'a> {
-  fasta_writer: Option<FastaWriter>,
-  fasta_peptide_writer: Option<FastaPeptideWriter>,
-  insertions_csv_writer: Option<InsertionsCsvWriter>,
-  errors_csv_writer: Option<ErrorsCsvWriter<'a>>,
-  expected_index: usize,
-  queue: HashMap<usize, NextalignRecord>,
-  in_order: bool,
-}
-
-impl<'a> NextalignOrderedWriter<'a> {
-  pub fn new(
-    gene_map: &'a GeneMap,
-    output_fasta: &Option<PathBuf>,
-    output_translations: &Option<String>,
-    output_insertions: &Option<PathBuf>,
-    output_errors: &Option<PathBuf>,
-    in_order: bool,
-  ) -> Result<Self, Report> {
-    let fasta_writer = output_fasta.map_ref_fallible(FastaWriter::from_path)?;
-
-    let fasta_peptide_writer = output_translations
-      .map_ref_fallible(|output_translations| FastaPeptideWriter::new(gene_map, output_translations))?;
-
-    let insertions_csv_writer = output_insertions.map_ref_fallible(InsertionsCsvWriter::new)?;
-
-    let errors_csv_writer =
-      output_errors.map_ref_fallible(|output_errors| ErrorsCsvWriter::new(gene_map, output_errors))?;
-
-    Ok(Self {
-      fasta_writer,
-      fasta_peptide_writer,
-      insertions_csv_writer,
-      errors_csv_writer,
-      expected_index: 0,
-      queue: HashMap::<usize, NextalignRecord>::new(),
-      in_order,
-    })
-  }
-
-  pub fn write_ref(&mut self, ref_record: &FastaRecord, ref_translation: &Translation) -> Result<(), Report> {
-    let FastaRecord { seq_name, seq, .. } = &ref_record;
-
-    if let Some(fasta_writer) = &mut self.fasta_writer {
-      fasta_writer.write(seq_name, seq, false)?;
-    }
-
-    ref_translation.cdses().try_for_each(|cds_tr| {
-      if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer {
-        fasta_peptide_writer.write(seq_name, cds_tr)?;
-      }
-      Result::<(), Report>::Ok(())
-    })?;
-
-    Ok(())
-  }
-
-  /// Writes output record into output files
-  fn write_impl(&mut self, record: &NextalignRecord) -> Result<(), Report> {
-    let NextalignRecord {
-      index,
-      seq_name,
-      outputs_or_err,
-    } = record;
-
-    match outputs_or_err {
-      Ok(output) => {
-        let NextalignOutputs {
-          stripped,
-          alignment,
-          translation,
-          aa_insertions,
-          warnings,
-          missing_genes,
-          is_reverse_complement,
-          ..
-        } = output;
-
-        if let Some(fasta_writer) = &mut self.fasta_writer {
-          fasta_writer.write(seq_name, &from_nuc_seq(&stripped.qry_seq), *is_reverse_complement)?;
-        }
-
-        if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer {
-          for translation in translation.cdses() {
-            fasta_peptide_writer.write(seq_name, translation)?;
-          }
-        }
-
-        if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
-          insertions_csv_writer.write(seq_name, &stripped.insertions, aa_insertions)?;
-        }
-
-        for warning in warnings {
-          info!("In sequence #{index} '{seq_name}': {}", warning.warning);
-        }
-
-        if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
-          errors_csv_writer.write_aa_errors(seq_name, warnings, missing_genes)?;
-        }
-      }
-      Err(report) => {
-        let cause = report_to_string(report);
-        let message = format!(
-          "In sequence #{index} '{seq_name}': {cause}. Note that this sequence will not be included in the results."
-        );
-        warn!("{message}");
-        if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
-          insertions_csv_writer.write(seq_name, &[], &[])?;
-        }
-        if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
-          errors_csv_writer.write_nuc_error(seq_name, &message)?;
-        }
-      }
-    }
-
-    Ok(())
-  }
-
-  /// In in-order mode, writes all queued records with indices subsequent to the next expected index.
-  /// On out-of-order mode, does nothing - the queue is always empty.
-  fn write_queued_records(&mut self) -> Result<(), Report> {
-    while let Some(record) = self.queue.remove(&self.expected_index) {
-      self.write_impl(&record)?;
-      self.expected_index += 1;
-    }
-    Ok(())
-  }
-
-  /// Writes a record.
-  ///
-  /// In in-order mode, if one or more of the preceding records has not been written yet (according to the record index
-  /// derived from order of records in the input files) then the current record is queued to be written at a later time.
-  /// This ensures that the records in output files are in the same order as in the input files.
-  ///
-  /// In out-of-order mode, records are written as they come from worker threads. In this case the order in output files
-  /// is not defined (due to differences in processing times between items, and thread scheduling between runs)
-  pub fn write_record(&mut self, record: NextalignRecord) -> Result<(), Report> {
-    if !self.in_order {
-      // Out-of-order mode: write immediately
-      self.write_impl(&record)?;
-    } else {
-      // In-order mode: check if the record has next expected index
-      if record.index == self.expected_index {
-        // If the record has next expected index, write it immediately
-        self.write_impl(&record)?;
-        self.expected_index += 1;
-      } else {
-        // If the record has an unexpected index, queue it to write later
-        self.queue.insert(record.index, record);
-      }
-
-      // Periodically try to write the queued records
-      self.write_queued_records()?;
-    }
-    Ok(())
-  }
-
-  /// Finalizes output by writing all queued records
-  pub fn finish(&mut self) -> Result<(), Report> {
-    self.write_queued_records()
-  }
-}
-
-impl<'a> Drop for NextalignOrderedWriter<'a> {
-  fn drop(&mut self) {
-    self.finish().wrap_err("When finalizing output writer").unwrap();
-  }
-}
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs
index d63a96a03..c619138f2 100644
--- a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs
@@ -1,17 +1,20 @@
 use crate::cli::nextclade_dataset_get::nextclade_dataset_get;
 use crate::cli::nextclade_dataset_list::nextclade_dataset_list;
 use crate::cli::nextclade_loop::nextclade_run;
+use crate::cli::nextclade_read_annotation::nextclade_read_annotation;
+use crate::cli::nextclade_seq_sort::nextclade_seq_sort;
 use crate::cli::verbosity::{Verbosity, WarnLevel};
 use crate::io::http_client::ProxyConfig;
+use clap::builder::styling;
 use clap::{ArgGroup, CommandFactory, Parser, Subcommand, ValueEnum, ValueHint};
 use clap_complete::{generate, Generator, Shell};
 use clap_complete_fig::Fig;
 use eyre::{eyre, ContextCompat, Report, WrapErr};
 use itertools::Itertools;
 use lazy_static::lazy_static;
-use nextclade::align::params::AlignPairwiseParamsOptional;
 use nextclade::io::fs::add_extension;
-use nextclade::tree::params::TreeBuilderParamsOptional;
+use nextclade::run::params::NextcladeInputParamsOptional;
+use nextclade::sort::params::NextcladeSeqSortParams;
 use nextclade::utils::global_init::setup_logger;
 use nextclade::{getenv, make_error};
 use std::fmt::Debug;
@@ -28,17 +31,19 @@ lazy_static! {
   pub static ref SHELLS: Vec<&'static str> = ["bash", "elvish", "fish", "fig", "powershell", "zsh"].to_vec();
 }
 
-pub fn check_shells(value: &str) -> Result<String, Report> {
-  SHELLS
-    .contains(&value)
-    .then_some(value.to_owned())
-    .ok_or_else(|| eyre!("Unknown shell: '{value}'. Possible values: {}", SHELLS.join(", ")))
+fn styles() -> styling::Styles {
+  styling::Styles::styled()
+    .header(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD)
+    .usage(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD)
+    .literal(styling::AnsiColor::Blue.on_default() | styling::Effects::BOLD)
+    .placeholder(styling::AnsiColor::Cyan.on_default())
 }
 
 #[derive(Parser, Debug)]
 #[clap(name = "nextclade")]
 #[clap(author, version)]
 #[clap(verbatim_doc_comment)]
+#[clap(styles = styles())]
 /// Viral genome alignment, mutation calling, clade assignment, quality checks and phylogenetic placement.
 ///
 /// Nextclade is a part of Nextstrain: https://nextstrain.org
@@ -53,7 +58,7 @@ pub struct NextcladeArgs {
   pub command: NextcladeCommands,
 
   /// Make output more quiet or more verbose
-  #[clap(flatten, next_help_heading = "  Verbosity")]
+  #[clap(flatten, next_help_heading = "Verbosity")]
   pub verbosity: Verbosity<WarnLevel>,
 }
 
@@ -70,19 +75,29 @@ pub enum NextcladeCommands {
   ///
   Completions {
     /// Name of the shell to generate appropriate completions
-    #[clap(value_name = "SHELL", default_value_t = String::from("bash"), value_parser = check_shells)]
+    #[clap(value_name = "SHELL", default_value_t = String::from("bash"), value_parser = SHELLS.clone())]
     shell: String,
   },
 
-  /// Run alignment, mutation calling, clade assignment, quality checks and phylogenetic placement
+  /// Run sequence analysis: alignment, mutation calling, clade assignment, quality checks and phylogenetic placement
   ///
   /// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade run --help`.
   Run(Box<NextcladeRunArgs>),
 
-  /// List and download available Nextclade datasets
+  /// List and download available Nextclade datasets (pathogens)
   ///
-  /// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade run --help`.
+  /// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade dataset --help`.
   Dataset(Box<NextcladeDatasetArgs>),
+
+  /// Sort sequences according to the inferred Nextclade dataset (pathogen)
+  ///
+  /// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade sort --help`.
+  Sort(Box<NextcladeSortArgs>),
+
+  /// Read genome annotation and present it in Nextclade's internal formats. This is mostly only useful for Nextclade maintainers and the most curious users. Note that these internal formats have no stability guarantees and can be changed at any time without notice.
+  ///
+  /// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade sort --help`.
+  ReadAnnotation(Box<NextcladeReadAnnotationArgs>),
 }
 
 #[derive(Parser, Debug)]
@@ -105,49 +120,80 @@ pub enum NextcladeDatasetCommands {
   Get(NextcladeDatasetGetArgs),
 }
 
+#[allow(clippy::struct_excessive_bools)]
 #[derive(Parser, Debug)]
 #[clap(verbatim_doc_comment)]
 pub struct NextcladeDatasetListArgs {
-  /// Restrict list to datasets with this name. Equivalent to `--attribute='name=<value>'`.
+  /// Restrict list to datasets with this exact name.
+  ///
+  /// Can be used to test if a dataset exists.
   #[clap(long, short = 'n')]
   #[clap(value_hint = ValueHint::Other)]
   pub name: Option<String>,
 
-  /// Restrict list to datasets based on this reference sequence (given its accession ID). Equivalent to `--attribute='reference=<value>'`.
-  ///
-  /// Special values: "all" - shows datasets with any reference sequences; "default" - show only datasets with default reference sequence (as defined by the author of a given dataset).
+  /// REMOVED
   #[clap(long, short = 'r')]
   #[clap(value_hint = ValueHint::Other)]
-  #[clap(default_value = "all")]
-  pub reference: String,
+  #[clap(hide_long_help = true, hide_short_help = true)]
+  pub reference: Option<String>,
 
-  /// Restrict list to datasets with this version tag. Equivalent to `--attribute='tag=<value>'`.
+  /// Restrict list to datasets with this exact version tag.
   #[clap(long, short = 't')]
   #[clap(value_hint = ValueHint::Other)]
-  #[clap(default_value = "latest")]
-  pub tag: String,
+  pub tag: Option<String>,
 
-  /// Restrict list to only datasets with a given combination of attribute key-value pairs.
-  /// Keys and values are separated with an equality sign.
-  /// This flag can occur multiple times, for multiple attributes.
-  /// Example: `--attribute='reference=MN908947' --attribute='tag=2022-04-28T12:00:00Z'`.
+  /// REMOVED
   #[clap(long, short = 'a')]
   #[clap(value_hint = ValueHint::Other)]
+  #[clap(hide_long_help = true, hide_short_help = true)]
   pub attribute: Vec<String>,
 
-  /// Include dataset version tags that are incompatible with this version of Nextclade CLI. By default the incompatible versions are omitted.
+  /// Include dataset versions that are incompatible with this version of Nextclade CLI.
+  ///
+  /// By default the incompatible versions are omitted.
   #[clap(long)]
   pub include_incompatible: bool,
 
-  /// Include older dataset version tags, additional to the latest.
+  /// REMOVED
   #[clap(long)]
-  pub include_old: bool,
+  #[clap(hide_long_help = true, hide_short_help = true)]
+  pub include_old: Option<bool>,
+
+  /// Include deprecated datasets.
+  ///
+  /// By default the deprecated datasets are omitted.
+  ///
+  /// Authors can mark a dataset as deprecated to express that the dataset will no longer be updated and/or supported. Reach out to dataset authors for concrete details.
+  #[clap(long)]
+  pub include_deprecated: bool,
+
+  /// Include experimental datasets.
+  ///
+  /// By default the experimental datasets are omitted.
+  ///
+  /// Authors can mark a dataset as experimental when development of the dataset is still in progress, or if the dataset is incomplete or of lower quality than usual. Use at own risk. Reach out to dataset authors if interested in further development and stabilizing of a particular dataset, and consider contributing.
+  #[clap(long)]
+  pub include_experimental: bool,
+
+  /// Include community datasets.
+  ///
+  /// By default the community datasets are omitted.
+  ///
+  /// Community datasets are the datasets provided by the members of the broader Nextclade community. These datasets may vary in quality and completeness. Depending on authors' goals, these datasets may be created for specific purposes, rather than for general use. Nextclade team is unable to verify correctness of these datasets and does not provide support for them. For all questions regarding a concrete community dataset, please read its documentation and reach out to its authors.
+  #[clap(long)]
+  pub include_community: bool,
 
   /// Print output in JSON format.
   #[clap(long)]
   pub json: bool,
 
-  /// Use custom dataset server
+  /// Print only names of the datasets, without other details.
+  #[clap(long)]
+  pub only_names: bool,
+
+  /// Use custom dataset server.
+  ///
+  /// You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details.
   #[clap(long)]
   #[clap(value_hint = ValueHint::Url)]
   #[clap(default_value_t = Url::from_str(DATA_FULL_DOMAIN).expect("Invalid URL"))]
@@ -161,37 +207,33 @@ pub struct NextcladeDatasetListArgs {
 #[clap(verbatim_doc_comment)]
 #[clap(group(ArgGroup::new("outputs").required(true).multiple(false)))]
 pub struct NextcladeDatasetGetArgs {
-  /// Name of the dataset to download. Equivalent to `--attribute='name=<value>'`. Use `dataset list` command to view available datasets.
+  /// Name of the dataset to download. Type `nextclade dataset list` to view available datasets.
   #[clap(long, short = 'n')]
   #[clap(value_hint = ValueHint::Other)]
   pub name: String,
 
-  /// Download dataset based on this reference sequence (given its accession ID).
-  /// If this flag is not provided or is 'default', will download dataset based on current default reference sequence, as defined by dataset maintainers.
-  /// The default reference sequence can change over time. Use `dataset list` command to view available options.
-  /// Equivalent to `--attribute='reference=<value>'`.
+  /// REMOVED
   #[clap(long, short = 'r')]
   #[clap(value_hint = ValueHint::Other)]
-  #[clap(default_value = "default")]
-  pub reference: String,
+  #[clap(hide_long_help = true, hide_short_help = true)]
+  pub reference: Option<String>,
 
   /// Version tag of the dataset to download.
-  /// If this flag is not provided or is 'latest', then the latest **compatible** version is downloaded.
-  /// Equivalent to `--attribute='tag=<value>'`.
+  ///
+  /// If this flag is not provided the latest version is downloaded.
   #[clap(long, short = 't')]
   #[clap(value_hint = ValueHint::Other)]
-  #[clap(default_value = "latest")]
-  pub tag: String,
+  pub tag: Option<String>,
 
-  /// Download dataset with a given combination of attribute key-value pairs.
-  /// Keys and values are separated with an equality sign.
-  /// This flag can occur multiple times, for multiple attributes.
-  /// Example: `--attribute='reference=MN908947' --attribute='tag=2022-04-28T12:00:00Z'`.
+  /// REMOVED
   #[clap(long, short = 'a')]
   #[clap(value_hint = ValueHint::Other)]
+  #[clap(hide_long_help = true, hide_short_help = true)]
   pub attribute: Vec<String>,
 
-  /// Use custom dataset server
+  /// Use custom dataset server.
+  ///
+  /// You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details.
   #[clap(long)]
   #[clap(value_hint = ValueHint::Url)]
   #[clap(default_value_t = Url::from_str(DATA_FULL_DOMAIN).expect("Invalid URL"))]
@@ -234,19 +276,17 @@ pub enum NextcladeOutputSelection {
   Tree,
   TreeNwk,
   Translations,
-  Insertions,
-  Errors,
 }
 
 #[derive(Parser, Debug, Clone)]
 pub struct NextcladeRunInputArgs {
   /// Path to one or multiple FASTA files with input sequences
   ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". If no files provided, the plain fasta input is read from standard input (stdin).
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". If no files provided, the plain fasta input is read from standard input (stdin).
   ///
   /// See: https://en.wikipedia.org/wiki/FASTA_format
   #[clap(value_hint = ValueHint::FilePath)]
-  #[clap(display_order = 1)]
+  #[clap(display_order = 0)]
   pub input_fastas: Vec<PathBuf>,
 
   /// REMOVED. Use positional arguments instead.
@@ -261,12 +301,12 @@ pub struct NextcladeRunInputArgs {
   ///
   /// See `nextclade dataset --help` on how to obtain datasets.
   ///
-  /// If this flag is not provided, the following individual input flags are required: `--input-root-seq`,
-  /// `--input-tree`, `--input-qc-config`, and the following individual input files are recommended: `--input-gene-map`,
-  /// `--input-pcr-primers`.
+  /// If this flag is not provided, no dataset will be loaded and individual input files have to be provided instead. In this case  `--input-ref` is required and `--input-gene-map`, `--input-tree` and `--input-pathogen-json` are optional.
   ///
   /// If both the `--input-dataset` and individual `--input-*` flags are provided, each individual flag overrides the
   /// corresponding file in the dataset.
+  ///
+  /// Please refer to Nextclade documentation for more details about Nextclade datasets and their files.
   #[clap(long, short = 'D')]
   #[clap(value_hint = ValueHint::AnyPath)]
   pub input_dataset: Option<PathBuf>,
@@ -287,7 +327,7 @@ pub struct NextcladeRunInputArgs {
   ///
   /// Overrides path to `reference.fasta` in the dataset (`--input-dataset`).
   ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin).
   #[clap(long, short = 'r', visible_alias("reference"), visible_alias("input-root-seq"))]
   #[clap(value_hint = ValueHint::FilePath)]
   pub input_ref: Option<PathBuf>,
@@ -298,61 +338,55 @@ pub struct NextcladeRunInputArgs {
   ///
   /// Overrides path to `tree.json` in the dataset (`--input-dataset`).
   ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin).
   #[clap(long, short = 'a')]
   #[clap(value_hint = ValueHint::FilePath)]
   pub input_tree: Option<PathBuf>,
 
-  /// Path to a JSON file containing configuration of Quality Control rules.
-  ///
-  /// Overrides path to `qc.json` in the dataset (`--input-dataset`).
-  ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
+  /// REMOVED. The qc.json file have been merged into pathogen.json, see `--input-pathogen-json`
   #[clap(long, short = 'Q')]
   #[clap(value_hint = ValueHint::FilePath)]
+  #[clap(hide_long_help = true, hide_short_help = true)]
   pub input_qc_config: Option<PathBuf>,
 
   /// Path to a JSON file containing configuration and data specific to a pathogen.
   ///
   /// Overrides path to `virus_properties.json` in the dataset (`--input-dataset`).
   ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin).
   #[clap(long, short = 'R')]
   #[clap(value_hint = ValueHint::FilePath)]
-  pub input_virus_properties: Option<PathBuf>,
+  pub input_pathogen_json: Option<PathBuf>,
 
-  /// Path to a CSV file containing a list of custom PCR primer sites. This information is used to report mutations in these sites.
-  ///
-  /// Overrides path to `primers.csv` in the dataset (`--input-dataset`).
-  ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
+  /// REMOVED. Merged into pathogen.json, see `--input-pathogen`
   #[clap(long, short = 'p')]
   #[clap(value_hint = ValueHint::FilePath)]
+  #[clap(hide_long_help = true, hide_short_help = true)]
   pub input_pcr_primers: Option<PathBuf>,
 
-  /// Path to a .gff file containing the gene map (genome annotation).
+  /// Path to a GFF3 file containing (genome annotation).
   ///
-  /// Gene map (sometimes also called 'genome annotation') is used to find coding regions. If not supplied, coding regions will
+  /// Genome annotation is used to find coding regions. If not supplied, coding regions will
   /// not be translated, amino acid sequences will not be output, amino acid mutations will not be detected and nucleotide sequence
   /// alignment will not be informed by codon boundaries
   ///
-  /// List of genes can be restricted using `--genes` flag. Otherwise all genes found in the gene map will be used.
+  /// List of genes can be restricted using `--genes` flag. Otherwise all genes found in the genome annotation will be used.
   ///
-  /// Overrides path to `genemap.gff` provided by `--input-dataset`.
+  /// Overrides genome annotation provided by the dataset (`--input-dataset` or `--dataset-name`).
   ///
   /// Learn more about Generic Feature Format Version 3 (GFF3):
   /// https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
   ///
-  /// Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin).
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin).
   #[clap(long, short = 'm', alias = "genemap")]
   #[clap(value_hint = ValueHint::FilePath)]
-  pub input_gene_map: Option<PathBuf>,
+  pub input_annotation: Option<PathBuf>,
 
   /// Comma-separated list of names of genes to use.
   ///
   /// This defines which peptides will be written into outputs, and which genes will be taken into account during
-  /// codon-aware alignment and aminoacid mutations detection. Must only contain gene names present in the gene map. If
-  /// this flag is not supplied or its value is an empty string, then all genes found in the gene map will be used.
+  /// codon-aware alignment and aminoacid mutations detection. Must only contain gene names present in the genome annotation. If
+  /// this flag is not supplied or its value is an empty string, then all genes found in the genome annotation will be used.
   ///
   /// Requires `--input-gene-map` to be specified.
   #[clap(
@@ -387,7 +421,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// If both the `--output-all` and individual `--output-*` flags are provided, each individual flag overrides the corresponding default output path.
   ///
-  /// At least one of the output flags is required: `--output-all`, `--output-fasta`, `--output-ndjson`, `--output-json`, `--output-csv`, `--output-tsv`, `--output-tree`, `--output-translations`, `--output-insertions`, `--output-errors`
+  /// At least one of the output flags is required: `--output-all`, `--output-fasta`, `--output-ndjson`, `--output-json`, `--output-csv`, `--output-tsv`, `--output-tree`, `--output-translations`.
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 'O')]
@@ -424,7 +458,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 'o')]
@@ -438,7 +472,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   ///
@@ -455,7 +489,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 'N')]
@@ -468,7 +502,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 'J')]
@@ -483,7 +517,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 'c')]
@@ -498,7 +532,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 't')]
@@ -529,7 +563,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long)]
@@ -545,7 +579,7 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long, short = 'T')]
@@ -558,67 +592,28 @@ pub struct NextcladeRunOutputArgs {
   ///
   /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
   ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
   ///
   /// If the required directory tree does not exist, it will be created.
   #[clap(long)]
   #[clap(value_hint = ValueHint::AnyPath)]
   pub output_tree_nwk: Option<PathBuf>,
 
-  /// Path to output CSV file that contain insertions stripped from the reference alignment.
-  ///
-  /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
-  ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
-  ///
-  /// If the required directory tree does not exist, it will be created.
+  /// REMOVED. The argument `--output-insertions` have been removed in favor of `--output-csv` and `--output-tsv`.
   #[clap(long, short = 'I')]
   #[clap(value_hint = ValueHint::AnyPath)]
+  #[clap(hide_long_help = true, hide_short_help = true)]
   pub output_insertions: Option<PathBuf>,
 
-  /// Path to output CSV file containing errors and warnings occurred during processing
-  ///
-  /// Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`.
-  ///
-  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zstd", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout).
-  ///
-  /// If the required directory tree does not exist, it will be created.
+  /// REMOVED. The argument `--output-errors` have been removed in favor of `--output-csv` and `--output-tsv`.
   #[clap(long, short = 'e')]
   #[clap(value_hint = ValueHint::AnyPath)]
+  #[clap(hide_long_help = true, hide_short_help = true)]
   pub output_errors: Option<PathBuf>,
-
-  /// Whether to include aligned reference nucleotide sequence into output nucleotide sequence FASTA file and reference peptides into output peptide FASTA files.
-  #[clap(long)]
-  pub include_reference: bool,
-
-  /// Whether to include the list of nearest nodes to the outputs
-  #[clap(long)]
-  pub include_nearest_node_info: bool,
-
-  /// Emit output sequences in-order.
-  ///
-  /// With this flag the program will wait for results from the previous sequences to be written to the output files before writing the results of the next sequences, preserving the same order as in the input file. Due to variable sequence processing times, this might introduce unnecessary waiting times, but ensures that the resulting sequences are written in the same order as they occur in the inputs (except for sequences which have errors).
-  /// By default, without this flag, processing might happen out of order, which is faster, due to the elimination of waiting, but might also lead to results written out of order - the order of results is not specified and depends on thread scheduling and processing times of individual sequences.
-  ///
-  /// This option is only relevant when `--jobs` is greater than 1 or is omitted.
-  ///
-  /// Note: the sequences which trigger errors during processing will be omitted from outputs, regardless of this flag.
-  #[clap(long)]
-  pub in_order: bool,
-
-  /// Replace unknown nucleotide characters with 'N'
-  ///
-  /// By default, the sequences containing unknown nucleotide characters are skipped with a warning - they
-  /// are not analyzed and not included into results. If this flag is provided, then before the alignment,
-  /// all unknown characters are replaced with 'N'. This replacement allows to analyze these sequences.
-  ///
-  /// The following characters are considered known:  '-', 'A', 'B', 'C', 'D', 'G', 'H', 'K', 'M', 'N', 'R', 'S', 'T', 'V', 'W', 'Y'
-  #[clap(long)]
-  pub replace_unknown: bool,
 }
 
 #[derive(Parser, Debug, Clone)]
-pub struct NextcladeRunOtherArgs {
+pub struct NextcladeRunOtherParams {
   /// Number of processing jobs. If not specified, all available CPU threads will be used.
   #[clap(global = false, long, short = 'j', default_value_t = num_cpus::get())]
   pub jobs: usize,
@@ -626,20 +621,120 @@ pub struct NextcladeRunOtherArgs {
 
 #[derive(Parser, Debug, Clone)]
 pub struct NextcladeRunArgs {
-  #[clap(flatten, next_help_heading = "  Inputs")]
+  #[clap(flatten, next_help_heading = "Inputs")]
   pub inputs: NextcladeRunInputArgs,
 
-  #[clap(flatten, next_help_heading = "  Outputs")]
+  #[clap(flatten, next_help_heading = "Outputs")]
   pub outputs: NextcladeRunOutputArgs,
 
-  #[clap(flatten, next_help_heading = "  Phylogenetic tree parameters")]
-  pub tree_builder_params: TreeBuilderParamsOptional,
+  #[clap(flatten)]
+  pub params: NextcladeInputParamsOptional,
+
+  #[clap(flatten, next_help_heading = "Other")]
+  pub other_params: NextcladeRunOtherParams,
+}
+
+#[allow(clippy::struct_excessive_bools)]
+#[derive(Parser, Debug)]
+#[clap(verbatim_doc_comment)]
+pub struct NextcladeSortArgs {
+  /// Path to one or multiple FASTA files with input sequences
+  ///
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". If no files provided, the plain fasta input is read from standard input (stdin).
+  ///
+  /// See: https://en.wikipedia.org/wiki/FASTA_format
+  #[clap(value_hint = ValueHint::FilePath)]
+  pub input_fastas: Vec<PathBuf>,
+
+  /// Path to input minimizer index JSON file.
+  ///
+  /// By default the latest reference minimizer index is fetched from the dataset server (default or customized with `--server` argument). If this argument is provided, the algorithm skips fetching the default index and uses the index provided in the the JSON file.
+  ///
+  /// Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin).
+  #[clap(long, short = 'm')]
+  #[clap(value_hint = ValueHint::FilePath)]
+  pub input_minimizer_index_json: Option<PathBuf>,
+
+  /// Path to output directory
+  ///
+  /// Sequences will be written in subdirectories: one subdirectory per dataset. Sequences inferred to be belonging to a particular dataset wil lbe places in the corresponding subdirectory. The subdirectory tree can be nested, depending on how dataset names are organized.
+  ///
+  /// Mutually exclusive with `--output`.
+  ///
+  #[clap(short = 'O', long)]
+  #[clap(value_hint = ValueHint::DirPath)]
+  #[clap(group = "outputs")]
+  pub output_dir: Option<PathBuf>,
+
+  /// Template string for the file path to output sorted sequences. A separate file will be generated per dataset.
+  ///
+  /// The string should contain template variable `{name}`, where the dataset name will be substituted. Note that if the `{name}` variable contains slashes, they will be interpreted as path segments and subdirectories will be created.
+  ///
+  /// Make sure you properly quote and/or escape the curly braces, so that your shell, programming language or pipeline manager does not attempt to substitute the variables.
+  ///
+  /// Mutually exclusive with `--output-dir`.
+  ///
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. If the required directory tree does not exist, it will be created.
+  ///
+  /// Example for bash shell:
+  ///
+  ///   --output='outputs/{name}/sorted.fasta.gz'
+  #[clap(short = 'o', long)]
+  #[clap(group = "outputs")]
+  pub output_path: Option<String>,
+
+  /// Path to output results TSV file
+  ///
+  /// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write uncompressed to standard output (stdout). If the required directory tree does not exist, it will be created.
+  #[clap(short = 'r', long)]
+  #[clap(value_hint = ValueHint::FilePath)]
+  pub output_results_tsv: Option<String>,
 
-  #[clap(flatten, next_help_heading = "  Alignment parameters")]
-  pub alignment_params: AlignPairwiseParamsOptional,
+  #[clap(flatten, next_help_heading = "Algorithm")]
+  pub search_params: NextcladeSeqSortParams,
 
-  #[clap(flatten, next_help_heading = "  Other")]
-  pub other: NextcladeRunOtherArgs,
+  #[clap(flatten, next_help_heading = "Other")]
+  pub other_params: NextcladeRunOtherParams,
+
+  /// Use custom dataset server.
+  ///
+  /// You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details.
+  #[clap(long)]
+  #[clap(value_hint = ValueHint::Url)]
+  #[clap(default_value_t = Url::from_str(DATA_FULL_DOMAIN).expect("Invalid URL"))]
+  pub server: Url,
+
+  #[clap(flatten)]
+  pub proxy_config: ProxyConfig,
+}
+
+#[allow(clippy::struct_excessive_bools)]
+#[derive(Parser, Debug)]
+#[clap(verbatim_doc_comment)]
+pub struct NextcladeReadAnnotationArgs {
+  /// Genome annotation file in GFF3 format.
+  ///
+  /// Learn more about Generic Feature Format Version 3 (GFF3):
+  /// https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
+  ///
+  #[clap(value_hint = ValueHint::FilePath)]
+  #[clap(display_order = 0)]
+  pub input_annotation: Option<PathBuf>,
+
+  /// Path to output JSON or YAML file.
+  ///
+  /// The format is chosen based on file extension: ".json" or ".yaml".
+  #[clap(long, short = 'o')]
+  #[clap(value_hint = ValueHint::DirPath)]
+  pub output: Option<PathBuf>,
+
+  /// Present features in "feature tree" format. This format is a precursor of genome annotation format - it contains all genetic features, even the ones that Nextclade does not use, but also less information about each feature.
+  #[clap(long)]
+  pub feature_tree: bool,
+
+  /// Print console output in JSON format, rather than human-readable table.
+  #[clap(long)]
+  pub json: bool,
 }
 
 fn generate_completions(shell: &str) -> Result<(), Report> {
@@ -676,8 +771,6 @@ pub fn nextclade_get_output_filenames(run_args: &mut NextcladeRunArgs) -> Result
         output_tsv,
         output_tree,
         output_tree_nwk,
-        output_insertions,
-        output_errors,
         ..
       },
     ..
@@ -703,14 +796,6 @@ pub fn nextclade_get_output_filenames(run_args: &mut NextcladeRunArgs) -> Result
       output_fasta.get_or_insert(add_extension(&default_output_file_path, "aligned.fasta"));
     }
 
-    if output_selection.contains(&NextcladeOutputSelection::Insertions) {
-      output_insertions.get_or_insert(add_extension(&default_output_file_path, "insertions.csv"));
-    }
-
-    if output_selection.contains(&NextcladeOutputSelection::Errors) {
-      output_errors.get_or_insert(add_extension(&default_output_file_path, "errors.csv"));
-    }
-
     if output_selection.contains(&NextcladeOutputSelection::Translations) {
       let output_translations_path =
         default_output_file_path.with_file_name(format!("{output_basename}_gene_{{gene}}"));
@@ -775,8 +860,6 @@ Example for bash shell:
     output_csv,
     output_tsv,
     output_tree,
-    output_insertions,
-    output_errors,
   ]
   .iter()
   .all(|o| o.is_none())
@@ -794,9 +877,7 @@ At least one of the following flags is required:
   --output-csv
   --output-tsv
   --output-tree
-  --output-translations
-  --output-insertions
-  --output-errors"#
+  --output-translations"#
     );
   }
 
@@ -831,15 +912,79 @@ For more information, type
 
   nextclade run --help"#;
 
+const ERROR_MSG_INPUT_QC_CONFIG_REMOVED: &str = r#"The argument `--input-qc-config` is removed in favor of `--input-pathogen-json`.
+
+Since Nextclade v3, the `pathogen.json` file is an extended version of file known as `virus_properties.json` in Nextclade v2. The Nextclade v2 files `qc.json`, `primers.csv` and `tag.json` are now merged into `pathogen.json`.
+
+For more information, type
+
+  nextclade run --help
+
+Read Nextclade documentation at:
+
+  https://docs.nextstrain.org/projects/nextclade/en/stable"#;
+
+const ERROR_MSG_INPUT_PCR_PRIMERS_REMOVED: &str = r#"The argument `--input-pcr-primers` is removed in favor of `--input-pathogen-json`.
+
+Since Nextclade v3, the `pathogen.json` file is an extended version of file known as `virus_properties.json` in Nextclade v2. The Nextclade v2 files `qc.json`, `primers.csv` and `tag.json` are now merged into `pathogen.json`.
+
+For more information, type
+
+  nextclade run --help
+
+Read Nextclade documentation at:
+
+  https://docs.nextstrain.org/projects/nextclade/en/stable"#;
+
+const ERROR_MSG_OUTPUT_INSERTIONS_REMOVED: &str = r#"The argument `--output-insertions` have been removed in favor of `--output-csv` and `--output-tsv`.
+
+In Nextclade v3 the separate arguments `--output-insertions` and `--output-errors` are removed. Please use `--output-csv` (for semicolon-separated table) and `--output-tsv` (for tab-separated table) arguments instead. These tables contain, among others, all the columns from the output insertions table (`--output-insertions`) as well as from the output errors table (`--output-errors`).
+
+For more information, type
+
+  nextclade run --help
+
+Read Nextclade documentation at:
+
+  https://docs.nextstrain.org/projects/nextclade/en/stable"#;
+
+const ERROR_MSG_OUTPUT_ERRORS_REMOVED: &str = r#"The argument `--output-errors` have been removed in favor of `--output-csv` and `--output-tsv`.
+
+In Nextclade v3 the separate arguments `--output-insertions` and `--output-errors` are removed. Please use `--output-csv` (for semicolon-separated table) and `--output-tsv` (for tab-separated table) arguments instead. These tables contain, among others, all the columns from the output insertions table (`--output-insertions`) as well as from the output errors table (`--output-errors`).
+
+For more information, type
+
+  nextclade run --help
+
+Read Nextclade documentation at:
+
+  https://docs.nextstrain.org/projects/nextclade/en/stable"#;
+
 pub fn nextclade_check_removed_args(run_args: &NextcladeRunArgs) -> Result<(), Report> {
   if run_args.inputs.input_fasta.is_some() {
     return make_error!("{ERROR_MSG_INPUT_FASTA_REMOVED}");
   }
 
+  if run_args.inputs.input_qc_config.is_some() {
+    return make_error!("{ERROR_MSG_INPUT_QC_CONFIG_REMOVED}");
+  }
+
+  if run_args.inputs.input_pcr_primers.is_some() {
+    return make_error!("{ERROR_MSG_INPUT_PCR_PRIMERS_REMOVED}");
+  }
+
   if run_args.outputs.output_dir.is_some() {
     return make_error!("{ERROR_MSG_OUTPUT_DIR_REMOVED}");
   }
 
+  if run_args.outputs.output_insertions.is_some() {
+    return make_error!("{ERROR_MSG_OUTPUT_INSERTIONS_REMOVED}");
+  }
+
+  if run_args.outputs.output_errors.is_some() {
+    return make_error!("{ERROR_MSG_OUTPUT_ERRORS_REMOVED}");
+  }
+
   Ok(())
 }
 
@@ -878,5 +1023,7 @@ pub fn nextclade_parse_cli_args() -> Result<(), Report> {
       NextcladeDatasetCommands::List(dataset_list_args) => nextclade_dataset_list(dataset_list_args),
       NextcladeDatasetCommands::Get(dataset_get_args) => nextclade_dataset_get(&dataset_get_args),
     },
+    NextcladeCommands::Sort(seq_sort_args) => nextclade_seq_sort(&seq_sort_args),
+    NextcladeCommands::ReadAnnotation(read_annotation_args) => nextclade_read_annotation(&read_annotation_args),
   }
 }
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_dataset_get.rs b/packages_rs/nextclade-cli/src/cli/nextclade_dataset_get.rs
index cf038de5a..22b9da1dd 100644
--- a/packages_rs/nextclade-cli/src/cli/nextclade_dataset_get.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_dataset_get.rs
@@ -1,151 +1,120 @@
 use crate::cli::nextclade_cli::NextcladeDatasetGetArgs;
-use crate::dataset::dataset_attributes::{format_attribute_list, parse_dataset_attributes};
 use crate::dataset::dataset_download::{dataset_dir_download, dataset_zip_download, download_datasets_index_json};
-use crate::dataset::dataset_table::format_dataset_table;
 use crate::io::http_client::HttpClient;
-use eyre::{eyre, Report, WrapErr};
+use eyre::{Report, WrapErr};
 use itertools::Itertools;
-use log::{info, LevelFilter};
+use log::{warn, LevelFilter};
 use nextclade::io::dataset::{Dataset, DatasetsIndexJson};
-use nextclade::{getenv, make_error};
-
-const THIS_VERSION: &str = getenv!("CARGO_PKG_VERSION");
+use nextclade::utils::info::{this_package_version, this_package_version_str};
+use nextclade::utils::string::find_similar_strings;
+use nextclade::{make_error, make_internal_error};
 
 pub struct DatasetHttpGetParams<'s> {
   pub name: &'s str,
-  pub reference: &'s str,
   pub tag: &'s str,
 }
 
-pub fn nextclade_dataset_http_get(
-  http: &mut HttpClient,
-  DatasetHttpGetParams { name, reference, tag }: DatasetHttpGetParams,
-  attributes: &[String],
-) -> Result<Dataset, Report> {
-  let DatasetsIndexJson { datasets, .. } = download_datasets_index_json(http)?;
+pub fn nextclade_dataset_get(
+  NextcladeDatasetGetArgs {
+    name,
+    reference,
+    tag,
+    attribute,
+    server,
+    output_dir,
+    output_zip,
+    proxy_config,
+  }: &NextcladeDatasetGetArgs,
+) -> Result<(), Report> {
+  if reference.is_some() || !attribute.is_empty() {
+    return make_error!("The arguments `--reference` and `--attribute` are removed. Datasets are now queried by `--name` and `--tag` only.\n\nIn order to list all dataset names, type:\n\n  nextclade dataset list --names-only\n\n. Please refer to `--help` and to Nextclade documentation for more details.");
+  }
 
-  // Parse attribute key-value pairs
-  let mut attributes = parse_dataset_attributes(attributes)?;
+  let verbose = log::max_level() > LevelFilter::Info;
 
-  // Handle special attributes differently
-  let name = if let Some(attr_name) = attributes.remove("name") {
-    attr_name
-  } else {
-    name.to_owned()
-  };
+  let mut http = HttpClient::new(server, proxy_config, verbose)?;
+  let dataset = dataset_http_get(&mut http, name, tag)?;
 
-  if let Some(attr_reference) = attributes.remove("reference") {
-    attr_reference
+  if let Some(output_dir) = &output_dir {
+    dataset_dir_download(&mut http, &dataset, output_dir)?;
+  } else if let Some(output_zip) = &output_zip {
+    dataset_zip_download(&mut http, &dataset, output_zip)?;
   } else {
-    reference.to_owned()
-  };
+  }
 
-  if let Some(attr_tag) = attributes.remove("tag") {
-    attr_tag
-  } else {
-    tag.to_owned()
-  };
+  Ok(())
+}
+
+pub fn dataset_http_get(http: &mut HttpClient, name: impl AsRef<str>, tag: &Option<String>) -> Result<Dataset, Report> {
+  let name = name.as_ref();
+  let tag = tag.as_ref();
 
-  let mut filtered = datasets
+  let DatasetsIndexJson { collections, .. } = download_datasets_index_json(http)?;
+
+  let datasets = collections
     .into_iter()
-    .filter(|dataset| dataset.enabled)
+    .flat_map(|collection| collection.datasets)
+    .collect_vec();
+
+  let paths = datasets.iter().map(|dataset| dataset.path.clone()).collect_vec();
+
+  let mut filtered = datasets.into_iter().filter(Dataset::is_enabled)
     .filter(|dataset| -> bool  {
       // If a concrete version `tag` is specified, we skip 'enabled', 'compatibility' and 'latest' checks
-      if tag == "latest" {
-        let is_not_old = dataset.is_latest();
-        let is_compatible = dataset.is_compatible(THIS_VERSION);
-        is_compatible && is_not_old
+      if let Some(tag) = tag.as_ref() {
+        dataset.is_tag(tag)
       } else {
-        dataset.attributes.tag.value == tag
-      }
-    })
-    // Filter by reference sequence
-    .filter(|dataset| {
-      if reference == "default" {
-        dataset.attributes.reference.is_default
-      } else {
-        dataset.attributes.reference.value == reference
+        dataset.is_latest()
       }
     })
     // Filter by name
     .filter(|dataset| {
-      dataset.attributes.name.value == name
-    })
-    // Filter by remaining attributes
-    .filter(|dataset| {
-      let mut should_include = true;
-      for (key, val) in &attributes {
-        let is_attr_matches = match dataset.attributes.rest_attrs.get(key) {
-          Some(attr) => {
-            if val == "default" {
-              attr.is_default
-            } else {
-              &attr.value == val
-            }
-          }
-          None => false
-        };
-        should_include = should_include && is_attr_matches;
-      }
-      should_include
+      dataset.path == name
     })
     .collect_vec();
 
-  let attributes_fmt = {
-    let attributes_fmt = format_attribute_list(&Some(name), reference, tag, &attributes);
-    if attributes_fmt.is_empty() {
-      "".to_owned()
-    } else {
-      format!(" having attributes: {attributes_fmt}")
+  let dataset = match &filtered.len() {
+    0 => {
+      let suggestions = find_similar_strings(paths.iter(), &name).take(10).collect_vec();
+      let suggestions_msg = (!suggestions.is_empty())
+        .then(|| {
+          let suggestions = suggestions.iter().map(|s| format!("- {s}")).join("\n");
+          format!("\n\nDid you mean:\n{suggestions}\n?")
+        })
+        .unwrap_or_default();
+      make_error!(
+        "Dataset not found: '{name}'.{suggestions_msg}\n\nType `nextclade dataset list` to show available datasets."
+      )
     }
-  };
-
-  info!("Searching for datasets{attributes_fmt}");
-
-  match &filtered.len() {
-    0 => make_error!("No datasets found{attributes_fmt}. Use `datasets list` command to show available datasets."),
     1 => Ok(filtered.remove(0)),
     _ => {
-      let table = format_dataset_table(&filtered);
-      make_error!("Can download only a single dataset, but multiple datasets found{attributes_fmt}. Add more specific attributes to select one of them. Given current attributes, the candidates are:\n{table}")
+      make_internal_error!("Expected to find a single dataset, but multiple datasets found.")
     }
+  }?;
+
+  if !dataset.is_cli_compatible(this_package_version()) {
+    warn!(
+      "The requested dataset '{}' with version tag '{}' is not compatible with this version of Nextclade ({}). This may cause errors and unexpected results. Please try to upgrade your Nextclade version and/or report this to dataset authors.",
+      dataset.path,
+      dataset.tag(),
+      this_package_version_str()
+    );
   }
-}
 
-pub fn nextclade_dataset_get(args: &NextcladeDatasetGetArgs) -> Result<(), Report> {
-  let verbose = log::max_level() > LevelFilter::Info;
-  let mut http = HttpClient::new(&args.server, &args.proxy_config, verbose)?;
-
-  let dataset = nextclade_dataset_http_get(
-    &mut http,
-    DatasetHttpGetParams {
-      name: &args.name,
-      reference: &args.reference,
-      tag: &args.tag,
-    },
-    &args.attribute,
-  )?;
-
-  if let Some(output_dir) = &args.output_dir {
-    dataset_dir_download(&mut http, &dataset, output_dir)?;
-  }
-
-  if let Some(output_zip) = &args.output_zip {
-    dataset_zip_download(&mut http, &dataset, output_zip)?;
-  }
-
-  Ok(())
+  Ok(dataset)
 }
 
-pub fn dataset_file_http_get(http: &mut HttpClient, dataset: &Dataset, filename: &str) -> Result<String, Report> {
-  let url = dataset
-    .files
-    .get(filename)
-    .ok_or_else(|| eyre!("File not found in the dataset: '{}'", filename))?;
+pub fn dataset_file_http_get(
+  http: &mut HttpClient,
+  dataset: &Dataset,
+  filename: impl AsRef<str>,
+) -> Result<String, Report> {
+  let filename = filename.as_ref();
+  let url = dataset.file_path(filename);
 
   let content = http
     .get(&url)
-    .wrap_err_with(|| format!("Dataset file download failed: '{url}'"))?;
+    .wrap_err_with(|| format!("when fetching dataset file '{filename}'"))?;
 
   let content_string = String::from_utf8(content)?;
 
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_dataset_list.rs b/packages_rs/nextclade-cli/src/cli/nextclade_dataset_list.rs
index 2b6976cd8..420cd6f77 100644
--- a/packages_rs/nextclade-cli/src/cli/nextclade_dataset_list.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_dataset_list.rs
@@ -1,130 +1,88 @@
 use crate::cli::nextclade_cli::NextcladeDatasetListArgs;
-use crate::dataset::dataset_attributes::{format_attribute_list, parse_dataset_attributes};
 use crate::dataset::dataset_download::download_datasets_index_json;
 use crate::dataset::dataset_table::format_dataset_table;
 use crate::io::http_client::HttpClient;
 use eyre::Report;
 use itertools::Itertools;
 use log::LevelFilter;
-use nextclade::getenv;
-use nextclade::io::dataset::DatasetsIndexJson;
+use nextclade::io::dataset::{Dataset, DatasetsIndexJson};
 use nextclade::io::json::{json_stringify, JsonPretty};
-
-const THIS_VERSION: &str = getenv!("CARGO_PKG_VERSION");
+use nextclade::make_error;
+use nextclade::utils::info::this_package_version;
 
 pub fn nextclade_dataset_list(
   NextcladeDatasetListArgs {
-    mut name,
-    mut reference,
-    mut tag,
+    name,
+    reference,
+    tag,
     attribute,
     include_incompatible,
     include_old,
+    include_deprecated,
+    include_experimental,
+    include_community,
     json,
+    only_names,
     server,
     proxy_config,
   }: NextcladeDatasetListArgs,
 ) -> Result<(), Report> {
-  let verbose = log::max_level() > LevelFilter::Info;
-  let mut http = HttpClient::new(&server, &proxy_config, verbose)?;
-  let DatasetsIndexJson { datasets, .. } = download_datasets_index_json(&mut http)?;
-
-  // Parse attribute key-value pairs
-  let mut attributes = parse_dataset_attributes(&attribute)?;
-
-  // Handle special attributes differently
-  if let Some(attr_name) = attributes.remove("name") {
-    name = Some(attr_name);
-  }
-  if let Some(attr_reference) = attributes.remove("reference") {
-    reference = attr_reference;
+  if include_old.is_some() {
+    return make_error!("The argument `--include-old` is removed.\n\nAll version tags are always listed now\n\n. Please refer to `--help` and to Nextclade documentation for more details.");
   }
-  if let Some(attr_tag) = attributes.remove("tag") {
-    tag = attr_tag;
+
+  if reference.is_some() || !attribute.is_empty() {
+    return make_error!("The arguments `--reference` and `--attribute` are removed. Datasets are now queried by `--name` and `--tag` only.\n\nIn order to list all dataset names, type:\n\n  nextclade dataset list --names-only\n\n. Please refer to `--help` and to Nextclade documentation for more details.");
   }
 
-  let filtered = datasets
+  let verbose = log::max_level() > LevelFilter::Info;
+
+  let mut http = HttpClient::new(&server, &proxy_config, verbose)?;
+  let DatasetsIndexJson { collections, .. } = download_datasets_index_json(&mut http)?;
+
+  let filtered = collections
     .into_iter()
-    .filter(|dataset| dataset.enabled)
+    .flat_map(|collection| collection.datasets)
+    .filter(Dataset::is_enabled)
     .filter(|dataset| -> bool  {
       // If a concrete version `tag` is specified, we skip 'enabled', 'compatibility' and 'latest' checks
-      if tag == "latest" {
-        let is_not_old = include_old || dataset.is_latest();
-        let is_compatible = include_incompatible || dataset.is_compatible(THIS_VERSION);
-        is_compatible && is_not_old
-      } else {
-        dataset.attributes.tag.value == tag
-      }
-    })
-    // Filter by reference sequence
-    .filter(|dataset| {
-      if reference == "all" {
-        true
-      } else if reference == "default" {
-        dataset.attributes.reference.is_default
+      if let Some(tag) = tag.as_ref() {
+        dataset.is_tag(tag)
       } else {
-        dataset.attributes.reference.value == reference
+        let is_compatible = include_incompatible || dataset.is_cli_compatible(this_package_version());
+        let is_not_deprecated = include_deprecated || !dataset.is_deprecated();
+        let is_not_experimental = include_experimental || !dataset.is_experimental();
+        let is_not_community = include_community || !dataset.is_community();
+        is_compatible && is_not_deprecated && is_not_experimental && is_not_community
       }
     })
     // Filter by name
     .filter(|dataset| {
       if let Some(name) = &name { &dataset.attributes.name.value == name } else {true}
     })
-    // Filter by remaining attributes
-    .filter(|dataset| {
-      let mut should_include = true;
-      for (key, val) in &attributes {
-        let is_attr_matches = match dataset.attributes.rest_attrs.get(key) {
-          Some(attr) => {
-            if val == "default" {
-              attr.is_default
-            } else {
-              &attr.value == val
-            }
-          }
-          None => false
-        };
-        should_include = should_include && is_attr_matches;
-      }
-      should_include
-    })
-    .sorted_by_key(|dataset| (
-      !dataset.attributes.name.is_default,
-      dataset.attributes.name.value.to_ascii_lowercase(),
-      !dataset.attributes.reference.is_default,
-      dataset.attributes.reference.value.to_ascii_lowercase(),
-      !dataset.attributes.tag.is_default,
-      dataset.attributes.tag.value.to_ascii_lowercase(),
-    ))
     .collect_vec();
 
+  let names = filtered.iter().map(|dataset| &dataset.path).collect_vec();
+
   if json {
-    println!("{}", json_stringify(&filtered, JsonPretty(true))?);
+    let content = if only_names {
+      json_stringify(&names, JsonPretty(true))
+    } else {
+      json_stringify(&filtered, JsonPretty(true))
+    }?;
+    println!("{content}");
   } else {
     if filtered.is_empty() {
       return Ok(());
     }
 
-    let table = format_dataset_table(&filtered);
-
-    let attributes_fmt = {
-      let attributes_fmt = format_attribute_list(&name, &reference, &tag, &attributes);
-      if attributes_fmt.is_empty() {
-        "".to_owned()
-      } else {
-        format!(", having attributes: {attributes_fmt}")
-      }
+    let content = if only_names {
+      names.into_iter().join("\n")
+    } else {
+      format_dataset_table(&filtered)
     };
 
-    if !include_incompatible && !include_old {
-      println!("Showing latest dataset(s) compatible with this version of Nextclade ({THIS_VERSION}){attributes_fmt}:\n{table}");
-    } else if !include_incompatible {
-      println!("Showing latest dataset(s){attributes_fmt}:\n{table}");
-    } else if !include_old {
-      println!("Showing datasets compatible with this version of Nextclade ({THIS_VERSION}){attributes_fmt}:\n{table}");
-    } else {
-      println!("Showing all datasets{attributes_fmt}:\n{table}");
-    }
+    println!("{content}");
   }
 
   Ok(())
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs b/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs
index e68188c9f..af94dac9b 100644
--- a/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs
@@ -1,200 +1,64 @@
 use crate::cli::nextclade_cli::{
-  NextcladeRunArgs, NextcladeRunInputArgs, NextcladeRunOtherArgs, NextcladeRunOutputArgs,
+  NextcladeRunArgs, NextcladeRunInputArgs, NextcladeRunOtherParams, NextcladeRunOutputArgs,
 };
 use crate::cli::nextclade_ordered_writer::NextcladeOrderedWriter;
-use crate::dataset::dataset_download::{
-  dataset_dir_load, dataset_individual_files_load, dataset_str_download_and_load, dataset_zip_load, DatasetFilesContent,
-};
+use crate::dataset::dataset_download::nextclade_get_inputs;
 use eyre::{Report, WrapErr};
-use itertools::Itertools;
 use log::info;
-use nextclade::align::gap_open::{get_gap_open_close_scores_codon_aware, get_gap_open_close_scores_flat};
-use nextclade::align::params::AlignPairwiseParams;
-use nextclade::align::seed_match2::CodonSpacedIndex;
-use nextclade::alphabet::nuc::{to_nuc_seq, to_nuc_seq_replacing, Nuc};
-use nextclade::analyze::find_aa_motifs::find_aa_motifs;
-use nextclade::analyze::phenotype::get_phenotype_attr_descs;
 use nextclade::gene::gene_map_display::gene_map_to_table_string;
-use nextclade::graph::graph::{convert_auspice_tree_to_graph, convert_graph_to_auspice_tree};
+use nextclade::graph::graph::convert_graph_to_auspice_tree;
 use nextclade::io::fasta::{FastaReader, FastaRecord};
-use nextclade::io::fs::has_extension;
 use nextclade::io::json::{json_write, JsonPretty};
 use nextclade::io::nextclade_csv::CsvColumnConfig;
 use nextclade::io::nwk_writer::nwk_write_to_file;
-use nextclade::make_error;
-use nextclade::run::nextclade_run_one::nextclade_run_one;
-use nextclade::translate::translate_genes::Translation;
-use nextclade::translate::translate_genes_ref::translate_genes_ref;
-use nextclade::tree::params::TreeBuilderParams;
+use nextclade::run::nextclade_wasm::{AnalysisInitialData, AnalysisOutput, Nextclade};
 use nextclade::tree::tree_builder::graph_attach_new_nodes_in_place;
-use nextclade::tree::tree_preprocess::graph_preprocess_in_place;
 use nextclade::types::outputs::NextcladeOutputs;
-use std::path::PathBuf;
 
 pub struct NextcladeRecord {
   pub index: usize,
   pub seq_name: String,
-  pub outputs_or_err: Result<(Vec<Nuc>, Translation, NextcladeOutputs), Report>,
-}
-
-pub struct DatasetFilePaths {
-  input_ref: PathBuf,
-  input_tree: PathBuf,
-  input_qc_config: PathBuf,
-  input_virus_properties: PathBuf,
-  input_pcr_primers: PathBuf,
-  input_gene_map: PathBuf,
-}
-
-pub fn nextclade_get_inputs(
-  run_args: &NextcladeRunArgs,
-  genes: &Option<Vec<String>>,
-) -> Result<DatasetFilesContent, Report> {
-  if let Some(dataset_name) = run_args.inputs.dataset_name.as_ref() {
-    dataset_str_download_and_load(run_args, dataset_name, genes)
-      .wrap_err_with(|| format!("When downloading dataset '{dataset_name}'"))
-  } else if let Some(input_dataset) = run_args.inputs.input_dataset.as_ref() {
-    if input_dataset.is_file() && has_extension(input_dataset, "zip") {
-      dataset_zip_load(run_args, input_dataset, genes)
-    } else if input_dataset.is_dir() {
-      dataset_dir_load(run_args.clone(), input_dataset, genes)
-    } else {
-      make_error!(
-        "--input-dataset: path is invalid. \
-        Expected a directory path or a zip archive file path, but got: '{input_dataset:#?}'"
-      )
-    }
-  } else {
-    dataset_individual_files_load(run_args, genes)
-  }
+  pub outputs_or_err: Result<AnalysisOutput, Report>,
 }
 
 pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
   info!("Command-line arguments:\n{run_args:#?}");
 
   let NextcladeRunArgs {
-    inputs:
-      NextcladeRunInputArgs {
-        input_fastas,
-        input_dataset,
-        input_ref,
-        input_tree,
-        input_qc_config,
-        input_virus_properties,
-        input_pcr_primers,
-        input_gene_map,
-        genes,
-        ..
-      },
+    inputs: NextcladeRunInputArgs {
+      input_fastas, genes, ..
+    },
     outputs:
       NextcladeRunOutputArgs {
-        output_all,
-        output_basename,
-        output_selection,
-        output_fasta,
-        output_translations,
-        output_ndjson,
-        output_json,
-        output_csv,
-        output_tsv,
         output_columns_selection,
+        output_graph,
         output_tree,
         output_tree_nwk,
-        output_graph,
-        output_insertions,
-        output_errors,
-        include_reference,
-        include_nearest_node_info,
-        in_order,
-        replace_unknown,
         ..
       },
-    other: NextcladeRunOtherArgs { jobs },
-    tree_builder_params,
-    alignment_params,
+    params,
+    other_params: NextcladeRunOtherParams { jobs },
   } = run_args.clone();
 
-  let DatasetFilesContent {
-    ref_record,
-    virus_properties,
-    tree,
-    ref gene_map,
-    qc_config,
-    primers,
-  } = nextclade_get_inputs(&run_args, &genes)?;
-
-  let ref_seq = &to_nuc_seq(&ref_record.seq).wrap_err("When reading reference sequence")?;
-  let seed_index = &CodonSpacedIndex::from_sequence(ref_seq);
-
-  let alignment_params = {
-    let mut alignment_params = AlignPairwiseParams::default();
-
-    // Merge alignment params coming from virus_properties into alignment_params
-    if let Some(alignment_params_from_file) = &virus_properties.alignment_params {
-      alignment_params.merge_opt(alignment_params_from_file.clone());
-    }
-
-    // Merge alignment params coming from CLI arguments
-    alignment_params.merge_opt(run_args.alignment_params);
-
-    alignment_params
-  };
-
-  let tree_builder_params = {
-    let mut tree_builder_params = TreeBuilderParams::default();
-
-    // Merge tree builder params coming from virus_properties into alignment_params
-    if let Some(tree_builder_params_from_file) = &virus_properties.tree_builder_params {
-      tree_builder_params.merge_opt(tree_builder_params_from_file.clone());
-    }
-
-    // Merge tree builder params coming from CLI arguments
-    tree_builder_params.merge_opt(run_args.tree_builder_params);
+  let inputs = nextclade_get_inputs(&run_args, &genes)?;
+  let nextclade = Nextclade::new(inputs, &params)?;
 
-    tree_builder_params
-  };
-
-  info!("Alignment parameters (final):\n{alignment_params:#?}");
-  info!("Tree builder parameters (final):\n{tree_builder_params:#?}");
-  info!("Gene map:\n{}", gene_map_to_table_string(gene_map)?);
-
-  let gap_open_close_nuc = &get_gap_open_close_scores_codon_aware(ref_seq, gene_map, &alignment_params);
-  let gap_open_close_aa = &get_gap_open_close_scores_flat(ref_seq, &alignment_params);
-
-  let ref_translation =
-    &translate_genes_ref(ref_seq, gene_map, &alignment_params).wrap_err("When translating reference genes")?;
-
-  let ref_cds_translations = ref_translation
-    .genes()
-    .flat_map(|gene| gene.cdses.values())
-    .cloned()
-    .collect_vec();
-
-  let aa_motifs_ref = &find_aa_motifs(&virus_properties.aa_motifs, ref_translation)?;
-
-  let should_keep_outputs = output_tree.is_some() || output_tree_nwk.is_some() || output_graph.is_some();
+  let should_write_tree = output_tree.is_some() || output_tree_nwk.is_some() || output_graph.is_some();
   let mut outputs = Vec::<NextcladeOutputs>::new();
 
-  let phenotype_attrs = &get_phenotype_attr_descs(&virus_properties);
-
-  let mut graph = convert_auspice_tree_to_graph(tree)?;
-  graph_preprocess_in_place(&mut graph, ref_seq, ref_translation)?;
-  let clade_node_attrs = graph.data.meta.clade_node_attr_descs();
-
-  let aa_motifs_keys = &virus_properties
-    .aa_motifs
-    .iter()
-    .map(|desc| desc.name.clone())
-    .collect_vec();
-
   let csv_column_config = CsvColumnConfig::new(&output_columns_selection)?;
 
+  info!("Parameters (final):\n{:#?}", &nextclade.params);
+  info!("Genome annotation:\n{}", gene_map_to_table_string(&nextclade.gene_map)?);
+
   std::thread::scope(|s| {
     const CHANNEL_SIZE: usize = 128;
     let (fasta_sender, fasta_receiver) = crossbeam_channel::bounded::<FastaRecord>(CHANNEL_SIZE);
     let (result_sender, result_receiver) = crossbeam_channel::bounded::<NextcladeRecord>(CHANNEL_SIZE);
 
+    let nextclade = &nextclade;
     let outputs = &mut outputs;
+    let run_args = &run_args;
 
     s.spawn(|| {
       let mut reader = FastaReader::from_paths(&input_fastas).unwrap();
@@ -215,60 +79,30 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
     for _ in 0..jobs {
       let fasta_receiver = fasta_receiver.clone();
       let result_sender = result_sender.clone();
-      let gap_open_close_nuc = &gap_open_close_nuc;
-      let gap_open_close_aa = &gap_open_close_aa;
-      let alignment_params = &alignment_params;
-      let ref_translation = &ref_translation;
-      let primers = &primers;
-      let graph = &graph;
-      let qc_config = &qc_config;
-      let virus_properties = &virus_properties;
 
       s.spawn(move || {
         let result_sender = result_sender.clone();
 
-        for FastaRecord { seq_name, seq, index } in &fasta_receiver {
-          info!("Processing sequence '{seq_name}'");
+        for fasta_record in &fasta_receiver {
+          info!("Processing sequence '{}'", fasta_record.seq_name);
 
-          let outputs_or_err = if replace_unknown {
-            Ok(to_nuc_seq_replacing(&seq))
-          } else {
-            to_nuc_seq(&seq)
-          }
-          .wrap_err_with(|| format!("When processing sequence #{index} '{seq_name}'"))
-          .and_then(|qry_seq| {
-            nextclade_run_one(
-              index,
-              &seq_name,
-              &qry_seq,
-              ref_seq,
-              seed_index,
-              ref_translation,
-              aa_motifs_ref,
-              gene_map,
-              primers,
-              graph,
-              qc_config,
-              virus_properties,
-              gap_open_close_nuc,
-              gap_open_close_aa,
-              alignment_params,
-              include_nearest_node_info,
+          let outputs_or_err = nextclade.run(&fasta_record).wrap_err_with(|| {
+            format!(
+              "When processing sequence #{} '{}'",
+              fasta_record.index, fasta_record.seq_name
             )
           });
 
-          let record = NextcladeRecord {
-            index,
-            seq_name,
-            outputs_or_err,
-          };
-
           // Important: **all** records should be sent into this channel, without skipping.
           // In in-order mode, writer that receives from this channel expects a contiguous stream of indices. Gaps in
           // the indices will cause writer to stall waiting for the missing index and the buffering queue to grow. Any
           // filtering of records should be done in the writer, instead of here.
           result_sender
-            .send(record)
+            .send(NextcladeRecord {
+              index: fasta_record.index,
+              seq_name: fasta_record.seq_name,
+              outputs_or_err,
+            })
             .wrap_err("When sending NextcladeRecord")
             .unwrap();
         }
@@ -278,36 +112,41 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
     }
 
     let writer = s.spawn(move || {
-      let mut output_writer = NextcladeOrderedWriter::new(
+      let nextclade = &nextclade;
+
+      let AnalysisInitialData {
+        genome_size,
         gene_map,
-        clade_node_attrs,
-        phenotype_attrs,
-        aa_motifs_keys,
-        &output_fasta,
-        &output_json,
-        &output_ndjson,
-        &output_csv,
-        &output_tsv,
-        &output_insertions,
-        &output_errors,
-        &output_translations,
+        clade_node_attr_key_descs,
+        phenotype_attr_descs,
+        aa_motif_keys,
+        ..
+      } = nextclade.get_initial_data();
+
+      let mut output_writer = NextcladeOrderedWriter::new(
+        &nextclade.gene_map,
+        clade_node_attr_key_descs,
+        phenotype_attr_descs,
+        aa_motif_keys,
         &csv_column_config,
-        in_order,
+        &run_args.outputs,
+        &nextclade.params,
       )
       .wrap_err("When creating output writer")
       .unwrap();
 
-      if include_reference {
+      if nextclade.params.general.include_reference {
         output_writer
-          .write_ref(&ref_record, ref_translation)
+          .write_ref(&nextclade.ref_record, &nextclade.ref_translation)
           .wrap_err("When writing output record for ref sequence")
           .unwrap();
       }
 
       for record in result_receiver {
-        if should_keep_outputs {
-          if let Ok((_, _, nextclade_outputs)) = &record.outputs_or_err {
-            outputs.push(nextclade_outputs.clone());
+        if should_write_tree {
+          // Save analysis results if they will be needed later
+          if let Ok(AnalysisOutput { analysis_result, .. }) = &record.outputs_or_err {
+            outputs.push(analysis_result.clone());
           }
         }
 
@@ -319,20 +158,25 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
     });
   });
 
-  if output_tree.is_some() || output_tree_nwk.is_some() || output_graph.is_some() {
-    graph_attach_new_nodes_in_place(&mut graph, outputs, ref_seq.len(), &tree_builder_params)?;
+  if should_write_tree {
+    let Nextclade {
+      ref_seq, params, graph, ..
+    } = nextclade;
+    if let Some(mut graph) = graph {
+      graph_attach_new_nodes_in_place(&mut graph, outputs, ref_seq.len(), &params.tree_builder)?;
 
-    if let Some(output_tree) = output_tree {
-      let tree = convert_graph_to_auspice_tree(&graph)?;
-      json_write(output_tree, &tree, JsonPretty(true))?;
-    }
+      if let Some(output_tree) = output_tree {
+        let tree = convert_graph_to_auspice_tree(&graph)?;
+        json_write(output_tree, &tree, JsonPretty(true))?;
+      }
 
-    if let Some(output_tree_nwk) = output_tree_nwk {
-      nwk_write_to_file(output_tree_nwk, &graph)?;
-    }
+      if let Some(output_tree_nwk) = output_tree_nwk {
+        nwk_write_to_file(output_tree_nwk, &graph)?;
+      }
 
-    if let Some(output_graph) = run_args.outputs.output_graph {
-      json_write(output_graph, &graph, JsonPretty(true))?;
+      if let Some(output_graph) = run_args.outputs.output_graph {
+        json_write(output_graph, &graph, JsonPretty(true))?;
+      }
     }
   }
 
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
index b1d36abc3..6a145b3d0 100644
--- a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
@@ -1,3 +1,4 @@
+use crate::cli::nextclade_cli::NextcladeRunOutputArgs;
 use crate::cli::nextclade_loop::NextcladeRecord;
 use eyre::{Report, WrapErr};
 use itertools::Itertools;
@@ -5,12 +6,12 @@ use log::{info, warn};
 use nextclade::alphabet::nuc::from_nuc_seq;
 use nextclade::analyze::virus_properties::PhenotypeAttrDesc;
 use nextclade::gene::gene_map::GeneMap;
-use nextclade::io::errors_csv::ErrorsCsvWriter;
 use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
-use nextclade::io::insertions_csv::InsertionsCsvWriter;
 use nextclade::io::ndjson::NdjsonFileWriter;
 use nextclade::io::nextclade_csv::{CsvColumnConfig, NextcladeResultsCsvFileWriter};
 use nextclade::io::results_json::ResultsJsonWriter;
+use nextclade::run::nextclade_wasm::AnalysisOutput;
+use nextclade::run::params::NextcladeInputParams;
 use nextclade::translate::translate_genes::Translation;
 use nextclade::tree::tree::CladeNodeAttrKeyDesc;
 use nextclade::types::outputs::NextcladeOutputs;
@@ -18,55 +19,41 @@ use nextclade::utils::error::report_to_string;
 use nextclade::utils::option::OptionMapRefFallible;
 use std::collections::HashMap;
 use std::hash::Hasher;
-use std::path::PathBuf;
 
 /// Writes output files, potentially preserving the initial order of records (same as in the inputs)
-pub struct NextcladeOrderedWriter<'a> {
+pub struct NextcladeOrderedWriter {
   fasta_writer: Option<FastaWriter>,
   fasta_peptide_writer: Option<FastaPeptideWriter>,
   output_json_writer: Option<ResultsJsonWriter>,
   output_ndjson_writer: Option<NdjsonFileWriter>,
   output_csv_writer: Option<NextcladeResultsCsvFileWriter>,
   output_tsv_writer: Option<NextcladeResultsCsvFileWriter>,
-  insertions_csv_writer: Option<InsertionsCsvWriter>,
-  errors_csv_writer: Option<ErrorsCsvWriter<'a>>,
   expected_index: usize,
   queue: HashMap<usize, NextcladeRecord>,
   in_order: bool,
 }
 
-impl<'a> NextcladeOrderedWriter<'a> {
+impl NextcladeOrderedWriter {
   pub fn new(
-    gene_map: &'a GeneMap,
+    gene_map: &GeneMap,
     clade_node_attr_key_descs: &[CladeNodeAttrKeyDesc],
     phenotype_attr_key_desc: &[PhenotypeAttrDesc],
     aa_motifs_keys: &[String],
-    output_fasta: &Option<PathBuf>,
-    output_json: &Option<PathBuf>,
-    output_ndjson: &Option<PathBuf>,
-    output_csv: &Option<PathBuf>,
-    output_tsv: &Option<PathBuf>,
-    output_insertions: &Option<PathBuf>,
-    output_errors: &Option<PathBuf>,
-    output_translations: &Option<String>,
     csv_column_config: &CsvColumnConfig,
-    in_order: bool,
+    output_params: &NextcladeRunOutputArgs,
+    params: &NextcladeInputParams,
   ) -> Result<Self, Report> {
-    let fasta_writer = output_fasta.map_ref_fallible(FastaWriter::from_path)?;
+    let fasta_writer = output_params.output_fasta.map_ref_fallible(FastaWriter::from_path)?;
 
-    let fasta_peptide_writer = output_translations
+    let fasta_peptide_writer = output_params
+      .output_translations
       .map_ref_fallible(|output_translations| FastaPeptideWriter::new(gene_map, output_translations))?;
 
-    let insertions_csv_writer = output_insertions.map_ref_fallible(InsertionsCsvWriter::new)?;
-
-    let errors_csv_writer =
-      output_errors.map_ref_fallible(|output_errors| ErrorsCsvWriter::new(gene_map, output_errors))?;
-
-    let output_json_writer = output_json.map_ref_fallible(|output_json| {
+    let output_json_writer = output_params.output_json.map_ref_fallible(|output_json| {
       ResultsJsonWriter::new(output_json, clade_node_attr_key_descs, phenotype_attr_key_desc)
     })?;
 
-    let output_ndjson_writer = output_ndjson.map_ref_fallible(NdjsonFileWriter::new)?;
+    let output_ndjson_writer = output_params.output_ndjson.map_ref_fallible(NdjsonFileWriter::new)?;
 
     let clade_node_attr_keys = clade_node_attr_key_descs
       .iter()
@@ -78,7 +65,7 @@ impl<'a> NextcladeOrderedWriter<'a> {
       .map(|desc| desc.name.clone())
       .collect_vec();
 
-    let output_csv_writer = output_csv.map_ref_fallible(|output_csv| {
+    let output_csv_writer = output_params.output_csv.map_ref_fallible(|output_csv| {
       NextcladeResultsCsvFileWriter::new(
         output_csv,
         b';',
@@ -89,7 +76,7 @@ impl<'a> NextcladeOrderedWriter<'a> {
       )
     })?;
 
-    let output_tsv_writer = output_tsv.map_ref_fallible(|output_tsv| {
+    let output_tsv_writer = output_params.output_tsv.map_ref_fallible(|output_tsv| {
       NextcladeResultsCsvFileWriter::new(
         output_tsv,
         b'\t',
@@ -107,11 +94,9 @@ impl<'a> NextcladeOrderedWriter<'a> {
       output_ndjson_writer,
       output_csv_writer,
       output_tsv_writer,
-      insertions_csv_writer,
-      errors_csv_writer,
       expected_index: 0,
       queue: HashMap::<usize, NextcladeRecord>::new(),
-      in_order,
+      in_order: params.general.in_order,
     })
   }
 
@@ -141,7 +126,11 @@ impl<'a> NextcladeOrderedWriter<'a> {
     } = record;
 
     match outputs_or_err {
-      Ok((qry_seq_stripped, translation, nextclade_outputs)) => {
+      Ok(AnalysisOutput {
+        query,
+        translation,
+        analysis_result,
+      }) => {
         let NextcladeOutputs {
           warnings,
           insertions,
@@ -149,10 +138,10 @@ impl<'a> NextcladeOrderedWriter<'a> {
           missing_genes,
           is_reverse_complement,
           ..
-        } = &nextclade_outputs;
+        } = &analysis_result;
 
         if let Some(fasta_writer) = &mut self.fasta_writer {
-          fasta_writer.write(&seq_name, &from_nuc_seq(&qry_seq_stripped), *is_reverse_complement)?;
+          fasta_writer.write(&seq_name, &from_nuc_seq(&query), *is_reverse_complement)?;
         }
 
         if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer {
@@ -161,32 +150,24 @@ impl<'a> NextcladeOrderedWriter<'a> {
           }
         }
 
-        if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
-          insertions_csv_writer.write(&seq_name, insertions, aa_insertions)?;
-        }
-
         for warning in warnings {
           info!("In sequence #{index} '{seq_name}': {}", warning.warning);
         }
 
-        if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
-          errors_csv_writer.write_aa_errors(&seq_name, warnings, missing_genes)?;
-        }
-
         if let Some(output_csv_writer) = &mut self.output_csv_writer {
-          output_csv_writer.write(&nextclade_outputs)?;
+          output_csv_writer.write(&analysis_result)?;
         }
 
         if let Some(output_tsv_writer) = &mut self.output_tsv_writer {
-          output_tsv_writer.write(&nextclade_outputs)?;
+          output_tsv_writer.write(&analysis_result)?;
         }
 
         if let Some(output_ndjson_writer) = &mut self.output_ndjson_writer {
-          output_ndjson_writer.write(&nextclade_outputs)?;
+          output_ndjson_writer.write(&analysis_result)?;
         }
 
         if let Some(output_json_writer) = &mut self.output_json_writer {
-          output_json_writer.write(nextclade_outputs);
+          output_json_writer.write(analysis_result);
         }
       }
       Err(report) => {
@@ -194,12 +175,6 @@ impl<'a> NextcladeOrderedWriter<'a> {
         warn!(
           "In sequence #{index} '{seq_name}': {cause}. Note that this sequence will not be included in the results."
         );
-        if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
-          insertions_csv_writer.write(&seq_name, &[], &[])?;
-        }
-        if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
-          errors_csv_writer.write_nuc_error(&seq_name, &cause)?;
-        }
         if let Some(output_csv_writer) = &mut self.output_csv_writer {
           output_csv_writer.write_nuc_error(index, &seq_name, &cause)?;
         }
@@ -267,7 +242,7 @@ impl<'a> NextcladeOrderedWriter<'a> {
   }
 }
 
-impl<'a> Drop for NextcladeOrderedWriter<'a> {
+impl Drop for NextcladeOrderedWriter {
   fn drop(&mut self) {
     self.finish().wrap_err("When finalizing output writer").unwrap();
   }
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_read_annotation.rs b/packages_rs/nextclade-cli/src/cli/nextclade_read_annotation.rs
new file mode 100644
index 000000000..f4182801e
--- /dev/null
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_read_annotation.rs
@@ -0,0 +1,54 @@
+use crate::cli::nextclade_cli::NextcladeReadAnnotationArgs;
+use eyre::Report;
+use nextclade::features::feature_tree::FeatureTree;
+use nextclade::gene::gene_map::GeneMap;
+use nextclade::gene::gene_map_display::gene_map_to_table_string;
+use nextclade::io::file::open_file_or_stdin;
+use nextclade::io::json::{json_or_yaml_write, json_stringify, JsonPretty};
+use std::io::Read;
+
+pub fn nextclade_read_annotation(args: &NextcladeReadAnnotationArgs) -> Result<(), Report> {
+  let content = {
+    let mut content = String::new();
+    open_file_or_stdin(&args.input_annotation)?.read_to_string(&mut content)?;
+    content
+  };
+
+  if args.feature_tree {
+    handle_feature_tree(args, &content)
+  } else {
+    handle_genome_annotation(args, &content)
+  }
+}
+
+fn handle_genome_annotation(args: &NextcladeReadAnnotationArgs, content: &str) -> Result<(), Report> {
+  let data = GeneMap::from_str(content)?;
+
+  if args.json {
+    println!("{}\n", json_stringify(&data, JsonPretty(true))?);
+  } else {
+    println!("{}", gene_map_to_table_string(&data)?);
+  }
+
+  if let Some(output) = &args.output {
+    json_or_yaml_write(output, &data)?;
+  }
+
+  Ok(())
+}
+
+fn handle_feature_tree(args: &NextcladeReadAnnotationArgs, content: &str) -> Result<(), Report> {
+  let data = FeatureTree::from_gff3_str(content)?;
+
+  if args.json {
+    println!("{}\n", json_stringify(&data, JsonPretty(true))?);
+  } else {
+    println!("{}", data.to_pretty_string()?);
+  }
+
+  if let Some(output) = &args.output {
+    json_or_yaml_write(output, &data)?;
+  }
+
+  Ok(())
+}
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs b/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs
new file mode 100644
index 000000000..3d864617e
--- /dev/null
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs
@@ -0,0 +1,410 @@
+use crate::cli::nextclade_cli::{NextcladeRunOtherParams, NextcladeSortArgs};
+use crate::dataset::dataset_download::download_datasets_index_json;
+use crate::io::http_client::HttpClient;
+use eyre::{Report, WrapErr};
+use itertools::Itertools;
+use log::{trace, LevelFilter};
+use nextclade::io::csv::CsvStructFileWriter;
+use nextclade::io::fasta::{FastaReader, FastaRecord, FastaWriter};
+use nextclade::io::fs::path_to_string;
+use nextclade::make_error;
+use nextclade::sort::minimizer_index::{MinimizerIndexJson, MINIMIZER_INDEX_ALGO_VERSION};
+use nextclade::sort::minimizer_search::{run_minimizer_search, MinimizerSearchRecord};
+use nextclade::utils::option::{OptionMapMutFallible, OptionMapRefFallible};
+use nextclade::utils::string::truncate;
+use ordered_float::OrderedFloat;
+use owo_colors::OwoColorize;
+use schemars::JsonSchema;
+use serde::Serialize;
+use std::collections::btree_map::Entry::{Occupied, Vacant};
+use std::collections::BTreeMap;
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+use tinytemplate::TinyTemplate;
+
+pub fn nextclade_seq_sort(args: &NextcladeSortArgs) -> Result<(), Report> {
+  check_args(args)?;
+
+  let NextcladeSortArgs {
+    server,
+    proxy_config,
+    input_minimizer_index_json,
+    ..
+  } = args;
+
+  let verbose = log::max_level() >= LevelFilter::Info;
+
+  let minimizer_index = if let Some(input_minimizer_index_json) = &input_minimizer_index_json {
+    // If a file is provided, use data from it
+    MinimizerIndexJson::from_path(input_minimizer_index_json)
+  } else {
+    // Otherwise fetch from dataset server
+    let mut http = HttpClient::new(server, proxy_config, verbose)?;
+    let index = download_datasets_index_json(&mut http)?;
+    let minimizer_index_path = index
+      .minimizer_index
+      .iter()
+      .find(|minimizer_index| MINIMIZER_INDEX_ALGO_VERSION == minimizer_index.version)
+      .map(|minimizer_index| &minimizer_index.path);
+
+    if let Some(minimizer_index_path) = minimizer_index_path {
+      let minimizer_index_str = http.get(minimizer_index_path)?;
+      MinimizerIndexJson::from_str(String::from_utf8(minimizer_index_str)?)
+    } else {
+      let server_versions = index
+        .minimizer_index
+        .iter()
+        .map(|minimizer_index| format!("'{}'", minimizer_index.version))
+        .join(",");
+      let server_versions = if server_versions.is_empty() {
+        "none available".to_owned()
+      } else {
+        format!(": {server_versions}")
+      };
+
+      make_error!("No compatible reference minimizer index data is found for this dataset sever. Cannot proceed. \n\nThis version of Nextclade supports index versions up to '{}', but the server has {}.\n\nTry to to upgrade Nextclade to the latest version and/or contact dataset server maintainers.", MINIMIZER_INDEX_ALGO_VERSION, server_versions)
+    }
+  }?;
+
+  run(args, &minimizer_index, verbose)
+}
+
+pub fn run(args: &NextcladeSortArgs, minimizer_index: &MinimizerIndexJson, verbose: bool) -> Result<(), Report> {
+  let NextcladeSortArgs {
+    input_fastas,
+    search_params,
+    other_params: NextcladeRunOtherParams { jobs },
+    ..
+  } = args;
+
+  std::thread::scope(|s| {
+    const CHANNEL_SIZE: usize = 128;
+    let (fasta_sender, fasta_receiver) = crossbeam_channel::bounded::<FastaRecord>(CHANNEL_SIZE);
+    let (result_sender, result_receiver) = crossbeam_channel::bounded::<MinimizerSearchRecord>(CHANNEL_SIZE);
+
+    s.spawn(|| {
+      let mut reader = FastaReader::from_paths(input_fastas).unwrap();
+      loop {
+        let mut record = FastaRecord::default();
+        reader.read(&mut record).unwrap();
+        if record.is_empty() {
+          break;
+        }
+        fasta_sender
+          .send(record)
+          .wrap_err("When sending a FastaRecord")
+          .unwrap();
+      }
+      drop(fasta_sender);
+    });
+
+    for _ in 0..*jobs {
+      let fasta_receiver = fasta_receiver.clone();
+      let result_sender = result_sender.clone();
+
+      s.spawn(move || {
+        let result_sender = result_sender.clone();
+
+        for fasta_record in &fasta_receiver {
+          trace!("Processing sequence '{}'", fasta_record.seq_name);
+
+          let result = run_minimizer_search(&fasta_record, minimizer_index, search_params)
+            .wrap_err_with(|| {
+              format!(
+                "When processing sequence #{} '{}'",
+                fasta_record.index, fasta_record.seq_name
+              )
+            })
+            .unwrap();
+
+          result_sender
+            .send(MinimizerSearchRecord { fasta_record, result })
+            .wrap_err("When sending minimizer record into the channel")
+            .unwrap();
+        }
+
+        drop(result_sender);
+      });
+    }
+
+    let writer = s.spawn(move || {
+      writer_thread(args, result_receiver, verbose).unwrap();
+    });
+  });
+
+  Ok(())
+}
+
+#[derive(Clone, Default, Debug, Serialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+struct SeqSortCsvEntry<'a> {
+  seq_name: &'a str,
+  dataset: Option<&'a str>,
+  score: Option<f64>,
+  num_hits: Option<u64>,
+}
+
+fn writer_thread(
+  args: &NextcladeSortArgs,
+  result_receiver: crossbeam_channel::Receiver<MinimizerSearchRecord>,
+  verbose: bool,
+) -> Result<(), Report> {
+  let NextcladeSortArgs {
+    output_dir,
+    output_path,
+    output_results_tsv,
+    ..
+  } = args;
+
+  let template = output_path.map_ref_fallible(move |output_path| -> Result<TinyTemplate, Report> {
+    let mut template = TinyTemplate::new();
+    template
+      .add_template("output", output_path)
+      .wrap_err_with(|| format!("When parsing template: '{output_path}'"))?;
+    Ok(template)
+  })?;
+
+  let mut writers = BTreeMap::new();
+  let mut stats = StatsPrinter::new(verbose);
+
+  let mut results_csv =
+    output_results_tsv.map_ref_fallible(|output_results_tsv| CsvStructFileWriter::new(output_results_tsv, b'\t'))?;
+
+  for record in result_receiver {
+    stats.print_seq(&record);
+
+    let datasets = &record.result.datasets;
+
+    if datasets.is_empty() {
+      results_csv.map_mut_fallible(|results_csv| {
+        results_csv.write(&SeqSortCsvEntry {
+          seq_name: &record.fasta_record.seq_name,
+          dataset: None,
+          score: None,
+          num_hits: None,
+        })
+      })?;
+    }
+
+    for dataset in datasets {
+      results_csv.map_mut_fallible(|results_csv| {
+        results_csv.write(&SeqSortCsvEntry {
+          seq_name: &record.fasta_record.seq_name,
+          dataset: Some(&dataset.name),
+          score: Some(dataset.score),
+          num_hits: Some(dataset.n_hits),
+        })
+      })?;
+    }
+
+    let names = datasets
+      .iter()
+      .map(|dataset| get_all_prefix_names(&dataset.name))
+      .collect::<Result<Vec<Vec<String>>, Report>>()?
+      .into_iter()
+      .flatten()
+      .unique();
+
+    for name in names {
+      let filepath = get_filepath(&name, &template, output_dir)?;
+
+      if let Some(filepath) = filepath {
+        let writer = get_or_insert_writer(&mut writers, filepath)?;
+        writer.write(&record.fasta_record.seq_name, &record.fasta_record.seq, false)?;
+      }
+    }
+  }
+
+  stats.finish();
+
+  Ok(())
+}
+
+pub fn get_all_prefix_names(name: impl AsRef<str>) -> Result<Vec<String>, Report> {
+  name
+    .as_ref()
+    .split('/')
+    .scan(PathBuf::new(), |name, component| {
+      *name = name.join(component);
+      Some(name.clone())
+    })
+    .unique()
+    .map(path_to_string)
+    .collect()
+}
+
+struct StatsPrinter {
+  enabled: bool,
+  stats: BTreeMap<String, usize>,
+  n_undetected: usize,
+}
+
+impl StatsPrinter {
+  pub fn new(enabled: bool) -> Self {
+    if enabled {
+      println!("Suggested datasets for each sequence");
+      println!("{}┐", "─".repeat(110));
+      println!(
+        "{:^40} │ {:^40} │ {:^10} │ {:^10} │",
+        "Sequence name", "Dataset", "Score", "Num. hits"
+      );
+      println!("{}┤", "─".repeat(110));
+    }
+
+    Self {
+      enabled,
+      stats: BTreeMap::new(),
+      n_undetected: 0,
+    }
+  }
+
+  pub fn print_seq(&mut self, record: &MinimizerSearchRecord) {
+    if !self.enabled {
+      return;
+    }
+
+    let datasets = record
+      .result
+      .datasets
+      .iter()
+      .sorted_by_key(|dataset| -OrderedFloat(dataset.score))
+      .collect_vec();
+
+    print!("{:<40}", truncate(&record.fasta_record.seq_name, 40));
+
+    if datasets.is_empty() {
+      println!(" │ {:40} │ {:>10.3} │ {:>10} │", "undetected".red(), "", "");
+      self.n_undetected += 1;
+    }
+
+    for (i, dataset) in datasets.into_iter().enumerate() {
+      let name = &dataset.name;
+      *self.stats.entry(name.clone()).or_insert(1) += 1;
+
+      if i != 0 {
+        print!("{:<40}", "");
+      }
+
+      println!(
+        " │ {:40} │ {:>10.3} │ {:>10} │",
+        &truncate(&dataset.name, 40),
+        &dataset.score,
+        &dataset.n_hits,
+      );
+    }
+
+    println!("{}┤", "─".repeat(110));
+  }
+
+  pub fn finish(&self) {
+    if !self.enabled {
+      return;
+    }
+
+    println!("\n\nSuggested datasets");
+    println!("{}┐", "─".repeat(67));
+    println!("{:^40} │ {:^10} │ {:^10} │", "Dataset", "Num. seq", "Percent");
+    println!("{}┤", "─".repeat(67));
+
+    let total_seq = self.stats.values().sum::<usize>() + self.n_undetected;
+    let stats = self
+      .stats
+      .iter()
+      .sorted_by_key(|(name, n_seq)| (-(**n_seq as isize), (*name).clone()));
+
+    for (name, n_seq) in stats {
+      println!(
+        "{:<40} │ {:>10} │ {:>9.3}% │",
+        name,
+        n_seq,
+        100.0 * (*n_seq as f64 / total_seq as f64)
+      );
+    }
+
+    if self.n_undetected > 0 {
+      println!("{}┤", "─".repeat(67));
+      println!(
+        "{:<40} │ {:>10} │ {:>10} │",
+        "undetected".red(),
+        self.n_undetected.red(),
+        format!("{:>9.3}%", 100.0 * (self.n_undetected as f64 / total_seq as f64)).red()
+      );
+    }
+
+    println!("{}┤", "─".repeat(67));
+    println!(
+      "{:>40} │ {:>10} │ {:>10} │",
+      "total".bold(),
+      total_seq.bold(),
+      format!("{:>9.3}%", 100.0).bold()
+    );
+    println!("{}┘", "─".repeat(67));
+  }
+}
+
+fn get_or_insert_writer(
+  writers: &mut BTreeMap<PathBuf, FastaWriter>,
+  filepath: impl AsRef<Path>,
+) -> Result<&mut FastaWriter, Report> {
+  Ok(match writers.entry(filepath.as_ref().to_owned()) {
+    Occupied(e) => e.into_mut(),
+    Vacant(e) => e.insert(FastaWriter::from_path(filepath)?),
+  })
+}
+
+fn get_filepath(
+  name: &str,
+  tt: &Option<TinyTemplate>,
+  output_dir: &Option<PathBuf>,
+) -> Result<Option<PathBuf>, Report> {
+  Ok(match (&tt, output_dir) {
+    (Some(tt), None) => {
+      let filepath_str = tt
+        .render("output", &OutputTemplateContext { name })
+        .wrap_err("When rendering output path template")?;
+
+      Some(PathBuf::from_str(&filepath_str).wrap_err_with(|| format!("Invalid output path: '{filepath_str}'"))?)
+    }
+    (None, Some(output_dir)) => Some(output_dir.join(name).join("sequences.fasta")),
+    _ => None,
+  })
+}
+
+#[derive(Serialize)]
+struct OutputTemplateContext<'a> {
+  name: &'a str,
+}
+
+fn check_args(args: &NextcladeSortArgs) -> Result<(), Report> {
+  let NextcladeSortArgs {
+    output_dir,
+    output_path: output,
+    ..
+  } = args;
+
+  if output.is_some() && output_dir.is_some() {
+    return make_error!(
+      "The arguments `--output-dir` and `--output` cannot be used together. Remove one or the other."
+    );
+  }
+
+  if let Some(output) = output {
+    if !output.contains("{name}") {
+      return make_error!(
+        r#"
+Expected `--output` argument to contain a template string containing template variable {{name}} (with curly braces), but received:
+
+  {output}
+
+Make sure the variable is not substituted by your shell, programming language or workflow manager. Apply proper escaping as needed.
+Example for bash shell:
+
+  --output='outputs/{{name}}/sorted.fasta.gz'
+
+      "#
+      );
+    }
+  }
+
+  Ok(())
+}
diff --git a/packages_rs/nextclade-cli/src/dataset/dataset_attributes.rs b/packages_rs/nextclade-cli/src/dataset/dataset_attributes.rs
deleted file mode 100644
index 3045d2bb3..000000000
--- a/packages_rs/nextclade-cli/src/dataset/dataset_attributes.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-use eyre::{Report, WrapErr};
-use indexmap::IndexMap;
-use itertools::Itertools;
-use lazy_static::lazy_static;
-use nextclade::make_error;
-use regex::Regex;
-
-pub fn parse_dataset_attributes(attribute_strs: &[String]) -> Result<IndexMap<String, String>, Report> {
-  attribute_strs
-    .iter()
-    .map(|attr| -> Result<(String, String), Report> { parse_dataset_attribute(attr) })
-    .collect::<Result<IndexMap<String, String>, Report>>()
-}
-
-const DATASET_ATTR_REGEX: &str = r#"(['"]?(?P<key>.+)['"]?=['"]?(?P<val>.+)['"]?)"#;
-
-pub fn parse_dataset_attribute(s: &str) -> Result<(String, String), Report> {
-  lazy_static! {
-    static ref RE: Regex = Regex::new(DATASET_ATTR_REGEX)
-      .wrap_err_with(|| format!("When compiling regular expression for dataset attributes: '{DATASET_ATTR_REGEX}'"))
-      .unwrap();
-  }
-
-  if let Some(captures) = RE.captures(s) {
-    return match (captures.name("key"), captures.name("val")) {
-      (Some(key), Some(val)) => {
-        let key: String = key.as_str().to_owned();
-        let val: String = val.as_str().to_owned();
-        Ok((key, val))
-      }
-      _ => make_error!("Unable to parse dataset attribute: '{s}'"),
-    };
-  }
-  make_error!("Unable to parse dataset attribute: '{s}'")
-}
-
-pub fn format_attribute_list(
-  name: &Option<String>,
-  reference: &str,
-  tag: &str,
-  attributes: &IndexMap<String, String>,
-) -> String {
-  let mut attributes_fmt = IndexMap::<String, String>::new();
-
-  if let Some(name) = name {
-    attributes_fmt.insert("name".to_owned(), name.clone());
-  }
-  attributes_fmt.insert("reference".to_owned(), reference.to_owned());
-  attributes_fmt.insert("tag".to_owned(), tag.to_owned());
-  attributes_fmt.extend(attributes.clone().into_iter());
-
-  attributes_fmt
-    .into_iter()
-    .map(|(key, val)| format!("{key}='{val}'"))
-    .collect_vec()
-    .join(", ")
-}
diff --git a/packages_rs/nextclade-cli/src/dataset/dataset_download.rs b/packages_rs/nextclade-cli/src/dataset/dataset_download.rs
index f58b02d23..cbf88065b 100644
--- a/packages_rs/nextclade-cli/src/dataset/dataset_download.rs
+++ b/packages_rs/nextclade-cli/src/dataset/dataset_download.rs
@@ -1,68 +1,76 @@
-use crate::cli::nextclade_cli::NextcladeRunArgs;
-use crate::cli::nextclade_dataset_get::{dataset_file_http_get, nextclade_dataset_http_get, DatasetHttpGetParams};
+use crate::cli::nextclade_cli::{NextcladeRunArgs, NextcladeRunInputArgs};
+use crate::cli::nextclade_dataset_get::{dataset_file_http_get, dataset_http_get};
 use crate::io::http_client::{HttpClient, ProxyConfig};
-use eyre::{Report, WrapErr};
+use eyre::{eyre, ContextCompat, Report, WrapErr};
 use itertools::Itertools;
 use log::LevelFilter;
-use nextclade::analyze::pcr_primers::PcrPrimer;
-use nextclade::analyze::virus_properties::VirusProperties;
+use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties};
 use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
-use nextclade::io::dataset::{Dataset, DatasetsIndexJson};
-use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str, FastaRecord};
-use nextclade::io::fs::absolute_path;
-use nextclade::io::json::json_parse_bytes;
-use nextclade::make_error;
-use nextclade::qc::qc_config::QcConfig;
+use nextclade::io::dataset::{Dataset, DatasetAttributeValue, DatasetAttributes, DatasetFiles, DatasetsIndexJson};
+use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
+use nextclade::io::file::create_file_or_stdout;
+use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
+use nextclade::run::nextclade_wasm::NextcladeParams;
 use nextclade::tree::tree::AuspiceTree;
-use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
-use std::fs;
+use nextclade::utils::option::OptionMapRefFallible;
+use nextclade::{make_error, make_internal_error, o};
+use rayon::iter::ParallelIterator;
+use std::collections::BTreeMap;
 use std::fs::File;
-use std::io::{BufReader, Read, Seek};
-use std::path::Path;
+use std::io::{BufReader, Read, Seek, Write};
+use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use zip::ZipArchive;
 
+const PATHOGEN_JSON: &str = "pathogen.json";
+
+pub fn nextclade_get_inputs(
+  run_args: &NextcladeRunArgs,
+  genes: &Option<Vec<String>>,
+) -> Result<NextcladeParams, Report> {
+  if let Some(dataset_name) = run_args.inputs.dataset_name.as_ref() {
+    dataset_str_download_and_load(run_args, genes)
+      .wrap_err_with(|| format!("When downloading dataset '{dataset_name}'"))
+  } else if let Some(input_dataset) = run_args.inputs.input_dataset.as_ref() {
+    if input_dataset.is_file() && has_extension(input_dataset, "zip") {
+      dataset_zip_load(run_args, input_dataset, genes)
+        .wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
+    } else if input_dataset.is_dir() {
+      dataset_dir_load(run_args, input_dataset, genes)
+        .wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
+    } else {
+      make_error!(
+        "--input-dataset: path is invalid. \
+        Expected a directory path or a zip archive file path, but got: '{input_dataset:#?}'"
+      )
+    }
+  } else {
+    dataset_individual_files_load(run_args, genes)
+  }
+}
+
 #[inline]
 pub fn download_datasets_index_json(http: &mut HttpClient) -> Result<DatasetsIndexJson, Report> {
-  json_parse_bytes(http.get(&"/index_v2.json")?.as_slice())
+  let data_bytes = http.get("/index.json")?;
+  let data_str = String::from_utf8(data_bytes)?;
+  DatasetsIndexJson::from_str(data_str)
 }
 
-pub fn dataset_dir_download(http: &mut HttpClient, dataset: &Dataset, output_dir: &Path) -> Result<(), Report> {
-  let output_dir = &absolute_path(output_dir)?;
-  fs::create_dir_all(output_dir).wrap_err_with(|| format!("When creating directory '{output_dir:#?}'"))?;
-
-  dataset
-    .files
-    .par_iter()
-    .map(|(filename, url)| -> Result<(), Report> {
-      let output_file_path = output_dir.join(filename);
-      let content = http.get(url)?;
-      fs::write(output_file_path, content)?;
-      Ok(())
-    })
-    .collect::<Result<(), Report>>()
-    .wrap_err_with(|| format!("When downloading dataset {dataset:#?}"))
+pub fn dataset_zip_fetch(http: &mut HttpClient, dataset: &Dataset) -> Result<Vec<u8>, Report> {
+  http
+    .get(&dataset.file_path("dataset.zip"))
+    .wrap_err_with(|| format!("When fetching zip file for dataset '{}'", dataset.path))
 }
 
 pub fn dataset_zip_download(http: &mut HttpClient, dataset: &Dataset, output_file_path: &Path) -> Result<(), Report> {
-  if let Some(parent_dir) = output_file_path.parent() {
-    let parent_dir = &absolute_path(parent_dir)?;
-    fs::create_dir_all(parent_dir)
-      .wrap_err_with(|| format!("When creating parent directory '{parent_dir:#?}' for file '{output_file_path:#?}'"))?;
-  }
+  let mut file =
+    create_file_or_stdout(output_file_path).wrap_err_with(|| format!("When opening file {output_file_path:?}"))?;
 
-  let content = http.get(&dataset.zip_bundle)?;
-  fs::write(output_file_path, content)
-    .wrap_err_with(|| format!("When writing downloaded dataset zip file to {output_file_path:#?}"))
-}
+  let content = dataset_zip_fetch(http, dataset)?;
 
-pub struct DatasetFilesContent {
-  pub ref_record: FastaRecord,
-  pub virus_properties: VirusProperties,
-  pub tree: AuspiceTree,
-  pub gene_map: GeneMap,
-  pub qc_config: QcConfig,
-  pub primers: Vec<PcrPrimer>,
+  file
+    .write_all(&content)
+    .wrap_err_with(|| format!("When writing downloaded dataset zip file to {output_file_path:#?}"))
 }
 
 pub fn zip_read_str<R: Read + Seek>(zip: &mut ZipArchive<R>, name: &str) -> Result<String, Report> {
@@ -71,159 +79,246 @@ pub fn zip_read_str<R: Read + Seek>(zip: &mut ZipArchive<R>, name: &str) -> Resu
   Ok(s)
 }
 
+pub fn read_from_path_or_zip(
+  filepath: &Option<impl AsRef<Path>>,
+  zip: &mut ZipArchive<BufReader<File>>,
+  zip_filename: &str,
+) -> Result<Option<String>, Report> {
+  if let Some(filepath) = filepath {
+    return Ok(Some(read_file_to_string(filepath)?));
+  }
+  Ok(zip_read_str(zip, zip_filename).ok())
+}
+
 pub fn dataset_zip_load(
   run_args: &NextcladeRunArgs,
   dataset_zip: impl AsRef<Path>,
   genes: &Option<Vec<String>>,
-) -> Result<DatasetFilesContent, Report> {
+) -> Result<NextcladeParams, Report> {
   let file = File::open(dataset_zip)?;
   let buf_file = BufReader::new(file);
   let mut zip = ZipArchive::new(buf_file)?;
 
-  let ref_record = run_args.inputs.input_ref.as_ref().map_or_else(
-    || read_one_fasta_str(&zip_read_str(&mut zip, "reference.fasta")?),
-    read_one_fasta,
-  )?;
-
-  let tree = run_args.inputs.input_tree.as_ref().map_or_else(
-    || AuspiceTree::from_str(&zip_read_str(&mut zip, "tree.json")?),
-    AuspiceTree::from_path,
-  )?;
-
-  let qc_config = run_args.inputs.input_qc_config.as_ref().map_or_else(
-    || QcConfig::from_str(&zip_read_str(&mut zip, "qc.json")?),
-    QcConfig::from_path,
-  )?;
-
-  let virus_properties = run_args.inputs.input_virus_properties.as_ref().map_or_else(
-    || VirusProperties::from_str(&zip_read_str(&mut zip, "virus_properties.json")?),
-    VirusProperties::from_path,
-  )?;
-
-  let primers = run_args.inputs.input_pcr_primers.as_ref().map_or_else(
-    || PcrPrimer::from_str(&zip_read_str(&mut zip, "primers.csv")?, &ref_record.seq),
-    |input_pcr_primers| PcrPrimer::from_path(input_pcr_primers, &ref_record.seq),
-  )?;
-
-  let gene_map = run_args.inputs.input_gene_map.as_ref().map_or_else(
-    || filter_gene_map(Some(GeneMap::from_str(zip_read_str(&mut zip, "genemap.gff")?)?), genes),
-    |input_gene_map| filter_gene_map(Some(GeneMap::from_file(input_gene_map)?), genes),
-  )?;
-
-  Ok(DatasetFilesContent {
+  let virus_properties = read_from_path_or_zip(&run_args.inputs.input_pathogen_json, &mut zip, "pathogen.json")?
+    .map_ref_fallible(VirusProperties::from_str)
+    .wrap_err("When reading pathogen JSON from dataset")?
+    .ok_or_else(|| eyre!("Pathogen JSON must always be present in the dataset but not found."))?;
+
+  let ref_record = read_from_path_or_zip(&run_args.inputs.input_ref, &mut zip, &virus_properties.files.reference)?
+    .map_ref_fallible(read_one_fasta_str)
+    .wrap_err("When reading reference sequence from dataset")?
+    .ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?;
+
+  let gene_map = read_from_path_or_zip(&run_args.inputs.input_annotation, &mut zip, "genome_annotation.gff3")?
+    .map_ref_fallible(GeneMap::from_str)
+    .wrap_err("When reading genome annotation from dataset")?
+    .map(|gene_map| filter_gene_map(gene_map, genes))
+    .unwrap_or_default();
+
+  let tree = read_from_path_or_zip(&run_args.inputs.input_tree, &mut zip, "tree.json")?
+    .map_ref_fallible(AuspiceTree::from_str)
+    .wrap_err("When reading reference tree JSON from dataset")?;
+
+  Ok(NextcladeParams {
     ref_record,
-    virus_properties,
-    tree,
     gene_map,
-    qc_config,
-    primers,
+    tree,
+    virus_properties,
   })
 }
 
-#[rustfmt::skip]
+pub fn dataset_dir_download(http: &mut HttpClient, dataset: &Dataset, output_dir: &Path) -> Result<(), Report> {
+  let mut content = dataset_zip_fetch(http, dataset)?;
+  let mut reader = std::io::Cursor::new(content.as_mut_slice());
+  let mut zip = ZipArchive::new(&mut reader)?;
+
+  ensure_dir(output_dir).wrap_err_with(|| format!("When creating directory {output_dir:#?}"))?;
+
+  zip
+    .extract(output_dir)
+    .wrap_err_with(|| format!("When extracting zip archive of dataset '{}'", dataset.path))
+}
+
 pub fn dataset_dir_load(
-  run_args: NextcladeRunArgs,
+  run_args: &NextcladeRunArgs,
   dataset_dir: impl AsRef<Path>,
   genes: &Option<Vec<String>>,
-) -> Result<DatasetFilesContent, Report> {
-  let input_dataset = dataset_dir.as_ref();
-  dataset_load_files(DatasetFilePaths {
-    input_ref: &run_args.inputs.input_ref.unwrap_or_else(|| input_dataset.join("reference.fasta")),
-    input_tree: &run_args.inputs.input_tree.unwrap_or_else(|| input_dataset.join("tree.json")),
-    input_qc_config: &run_args.inputs.input_qc_config.unwrap_or_else(|| input_dataset.join("qc.json")),
-    input_virus_properties: &run_args.inputs.input_virus_properties.unwrap_or_else(|| input_dataset.join("virus_properties.json")),
-    input_pcr_primers: &run_args.inputs.input_pcr_primers.unwrap_or_else(|| input_dataset.join("primers.csv")),
-    input_gene_map: &run_args.inputs.input_gene_map.unwrap_or_else(|| input_dataset.join("genemap.gff")),
-  }, genes)
+) -> Result<NextcladeParams, Report> {
+  let dataset_dir = dataset_dir.as_ref();
+
+  let NextcladeRunInputArgs {
+    input_ref,
+    input_tree,
+    input_pathogen_json,
+    input_annotation,
+    ..
+  } = &run_args.inputs;
+
+  let input_pathogen_json = input_pathogen_json
+    .clone()
+    .unwrap_or_else(|| dataset_dir.join("pathogen.json"));
+
+  let virus_properties = VirusProperties::from_path(input_pathogen_json)?;
+
+  let input_ref = input_ref
+    .clone()
+    .unwrap_or_else(|| dataset_dir.join(&virus_properties.files.reference));
+  let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;
+
+  let gene_map = input_annotation
+    .clone()
+    .or_else(|| {
+      virus_properties
+        .files
+        .genome_annotation
+        .as_ref()
+        .map(|genome_annotation| dataset_dir.join(genome_annotation))
+    })
+    .map_ref_fallible(GeneMap::from_path)
+    .wrap_err("When reading genome annotation")?
+    .map(|gen_map| filter_gene_map(gen_map, genes))
+    .unwrap_or_default();
+
+  let tree = input_tree
+    .clone()
+    .or_else(|| {
+      virus_properties
+        .files
+        .tree_json
+        .as_ref()
+        .map(|tree_json| dataset_dir.join(tree_json))
+    })
+    .map_ref_fallible(AuspiceTree::from_path)
+    .wrap_err("When reading reference tree JSON")?;
+
+  Ok(NextcladeParams {
+    ref_record,
+    gene_map,
+    tree,
+    virus_properties,
+  })
 }
 
 pub fn dataset_individual_files_load(
   run_args: &NextcladeRunArgs,
   genes: &Option<Vec<String>>,
-) -> Result<DatasetFilesContent, Report> {
-  #[rustfmt::skip]
-  let required_args = &[
-    (String::from("--input-ref"), &run_args.inputs.input_ref),
-    (String::from("--input-tree"), &run_args.inputs.input_tree),
-    (String::from("--input-gene-map"), &run_args.inputs.input_gene_map),
-    (String::from("--input-qc-config"), &run_args.inputs.input_qc_config),
-    (String::from("--input-pcr-primers"), &run_args.inputs.input_pcr_primers),
-    (String::from("--input-virus-properties"), &run_args.inputs.input_virus_properties),
-  ];
-
-  #[allow(clippy::single_match_else)]
-  match required_args {
-    #[rustfmt::skip]
-    [
-      (_, Some(input_ref)),
-      (_, Some(input_tree)),
-      (_, Some(input_gene_map)),
-      (_, Some(input_qc_config)),
-      (_, Some(input_pcr_primers)),
-      (_, Some(input_virus_properties)),
-    ] => {
-      dataset_load_files(DatasetFilePaths {
-        input_ref,
-        input_tree,
-        input_qc_config,
-        input_virus_properties,
-        input_pcr_primers,
-        input_gene_map,
-      }, genes)
-    },
-    _ => {
-      let missing_args = required_args
-        .iter()
-        .filter_map(|(key, val)| match val {
-          None => Some(key),
-          Some(_) => None,
-        })
-        .cloned()
-        .join("  \n");
-
-      make_error!("When `--input-dataset` is not specified, the following arguments are required:\n{missing_args}")
+) -> Result<NextcladeParams, Report> {
+  match (&run_args.inputs.input_dataset, &run_args.inputs.input_ref) {
+    (None, None) => make_error!("When `--input-dataset` is not specified, --input-ref is required"),
+    (_, Some(input_ref)) => {
+      let virus_properties = run_args
+        .inputs
+        .input_pathogen_json
+        .as_ref()
+        .and_then(|input_pathogen_json| read_file_to_string(input_pathogen_json).ok())
+        .map_ref_fallible(VirusProperties::from_str)
+        .wrap_err("When reading pathogen JSON")?
+        .unwrap_or_else(|| {
+          // The only case where we allow pathogen.json to be missing is when there's no dataset and files are provided
+          // explicitly through args. Let's create an dummy value to avoid making the field optional
+          VirusProperties {
+            schema_version: "".to_owned(),
+            attributes: DatasetAttributes {
+              name: DatasetAttributeValue {
+                value: "".to_owned(),
+                value_friendly: None,
+                is_default: None,
+                other: serde_json::Value::default(),
+              },
+              reference: DatasetAttributeValue {
+                value: "".to_owned(),
+                value_friendly: None,
+                is_default: None,
+                other: serde_json::Value::default(),
+              },
+              rest_attrs: BTreeMap::default(),
+              other: serde_json::Value::default(),
+            },
+            files: DatasetFiles {
+              reference: "".to_owned(),
+              pathogen_json: "".to_owned(),
+              genome_annotation: None,
+              tree_json: None,
+              examples: None,
+              readme: None,
+              changelog: None,
+              rest_files: BTreeMap::default(),
+              other: serde_json::Value::default(),
+            },
+            deprecated: false,
+            enabled: true,
+            experimental: false,
+            default_gene: None,
+            gene_order_preference: vec![],
+            mut_labels: LabelledMutationsConfig::default(),
+            primers: vec![],
+            qc: None,
+            general_params: None,
+            alignment_params: None,
+            tree_builder_params: None,
+            phenotype_data: None,
+            aa_motifs: vec![],
+            versions: vec![],
+            version: None,
+            compatibility: None,
+            other: serde_json::Value::default(),
+          }
+        });
+
+      let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;
+
+      let gene_map = run_args
+        .inputs
+        .input_annotation
+        .as_ref()
+        .map_ref_fallible(GeneMap::from_path)
+        .wrap_err("When reading genome annotation")?
+        .map(|gen_map| filter_gene_map(gen_map, genes))
+        .unwrap_or_default();
+
+      let tree = run_args
+        .inputs
+        .input_tree
+        .as_ref()
+        .map_ref_fallible(AuspiceTree::from_path)
+        .wrap_err("When reading reference tree JSON")?;
+
+      Ok(NextcladeParams {
+        ref_record,
+        gene_map,
+        tree,
+        virus_properties,
+      })
     }
+    _ => make_internal_error!("Reached unknown match arm"),
   }
 }
 
 pub struct DatasetFilePaths<'a> {
   input_ref: &'a Path,
-  input_tree: &'a Path,
-  input_qc_config: &'a Path,
-  input_virus_properties: &'a Path,
-  input_pcr_primers: &'a Path,
-  input_gene_map: &'a Path,
+  input_tree: &'a Option<PathBuf>,
+  input_pathogen_json: &'a Option<PathBuf>,
+  input_annotation: &'a Option<PathBuf>,
 }
 
-pub fn dataset_load_files(
-  DatasetFilePaths {
-    input_ref,
-    input_tree,
-    input_qc_config,
-    input_virus_properties,
-    input_pcr_primers,
-    input_gene_map,
-  }: DatasetFilePaths,
-  genes: &Option<Vec<String>>,
-) -> Result<DatasetFilesContent, Report> {
-  let ref_record = read_one_fasta(input_ref)?;
-  let primers = PcrPrimer::from_path(input_pcr_primers, &ref_record.seq)?;
-
-  Ok(DatasetFilesContent {
-    ref_record,
-    virus_properties: VirusProperties::from_path(input_virus_properties)?,
-    gene_map: filter_gene_map(Some(GeneMap::from_file(input_gene_map)?), genes)?,
-    tree: AuspiceTree::from_path(input_tree)?,
-    qc_config: QcConfig::from_path(input_qc_config)?,
-    primers,
-  })
+pub fn read_from_path_or_url(
+  http: &mut HttpClient,
+  dataset: &Dataset,
+  filepath: &Option<impl AsRef<Path>>,
+  url: &Option<String>,
+) -> Result<Option<String>, Report> {
+  if let Some(filepath) = filepath {
+    return Ok(Some(read_file_to_string(filepath)?));
+  } else if let Some(url) = url {
+    return Ok(Some(dataset_file_http_get(http, dataset, url)?));
+  }
+  Ok(None)
 }
 
 pub fn dataset_str_download_and_load(
   run_args: &NextcladeRunArgs,
-  dataset_name: &str,
   genes: &Option<Vec<String>>,
-) -> Result<DatasetFilesContent, Report> {
+) -> Result<NextcladeParams, Report> {
   let verbose = log::max_level() > LevelFilter::Info;
   let mut http = HttpClient::new(&run_args.inputs.server, &ProxyConfig::default(), verbose)?;
 
@@ -233,66 +328,51 @@ pub fn dataset_str_download_and_load(
     .as_ref()
     .expect("Dataset name is expected, but got 'None'");
 
-  let dataset = nextclade_dataset_http_get(
-    &mut http,
-    DatasetHttpGetParams {
-      name,
-      reference: "default",
-      tag: "latest",
-    },
-    &[],
-  )?;
-
-  let ref_record = run_args.inputs.input_ref.as_ref().map_or_else(
-    || read_one_fasta_str(&dataset_file_http_get(&mut http, &dataset, "reference.fasta")?),
-    read_one_fasta,
-  )?;
-
-  let tree = run_args.inputs.input_tree.as_ref().map_or_else(
-    || AuspiceTree::from_str(&dataset_file_http_get(&mut http, &dataset, "tree.json")?),
-    AuspiceTree::from_path,
-  )?;
-
-  let qc_config = run_args.inputs.input_qc_config.as_ref().map_or_else(
-    || QcConfig::from_str(&dataset_file_http_get(&mut http, &dataset, "qc.json")?),
-    QcConfig::from_path,
-  )?;
-
-  let virus_properties = run_args.inputs.input_virus_properties.as_ref().map_or_else(
-    || VirusProperties::from_str(&dataset_file_http_get(&mut http, &dataset, "virus_properties.json")?),
-    VirusProperties::from_path,
-  )?;
-
-  let primers = run_args.inputs.input_pcr_primers.as_ref().map_or_else(
-    || {
-      PcrPrimer::from_str(
-        &dataset_file_http_get(&mut http, &dataset, "primers.csv")?,
-        &ref_record.seq,
-      )
-    },
-    |input_pcr_primers| PcrPrimer::from_path(input_pcr_primers, &ref_record.seq),
-  )?;
-
-  let gene_map = run_args.inputs.input_gene_map.as_ref().map_or_else(
-    || {
-      filter_gene_map(
-        Some(GeneMap::from_str(dataset_file_http_get(
-          &mut http,
-          &dataset,
-          "genemap.gff",
-        )?)?),
-        genes,
-      )
-    },
-    |input_gene_map| filter_gene_map(Some(GeneMap::from_file(input_gene_map)?), genes),
-  )?;
+  let dataset = dataset_http_get(&mut http, name, &None)?;
 
-  Ok(DatasetFilesContent {
+  let virus_properties = read_from_path_or_url(
+    &mut http,
+    &dataset,
+    &run_args.inputs.input_pathogen_json,
+    &Some(o!("pathogen.json")),
+  )?
+  .map_ref_fallible(VirusProperties::from_str)
+  .wrap_err("When reading pathogen JSON from dataset")?
+  .ok_or_else(|| eyre!("Required file not found in dataset: 'pathogen.json'. Please report it to dataset authors."))?;
+
+  let ref_record = read_from_path_or_url(
+    &mut http,
+    &dataset,
+    &run_args.inputs.input_ref,
+    &Some(dataset.files.reference.clone()),
+  )?
+  .map_ref_fallible(read_one_fasta_str)?
+  .wrap_err("When reading reference sequence from dataset")?;
+
+  let gene_map = read_from_path_or_url(
+    &mut http,
+    &dataset,
+    &run_args.inputs.input_annotation,
+    &dataset.files.genome_annotation,
+  )?
+  .map_ref_fallible(GeneMap::from_str)
+  .wrap_err("When reading genome annotation from dataset")?
+  .map(|gene_map| filter_gene_map(gene_map, genes))
+  .unwrap_or_default();
+
+  let tree = read_from_path_or_url(
+    &mut http,
+    &dataset,
+    &run_args.inputs.input_tree,
+    &dataset.files.tree_json,
+  )?
+  .map_ref_fallible(AuspiceTree::from_str)
+  .wrap_err("When reading reference tree from dataset")?;
+
+  Ok(NextcladeParams {
     ref_record,
-    virus_properties,
-    tree,
     gene_map,
-    qc_config,
-    primers,
+    tree,
+    virus_properties,
   })
 }
diff --git a/packages_rs/nextclade-cli/src/dataset/dataset_table.rs b/packages_rs/nextclade-cli/src/dataset/dataset_table.rs
index 7dc02191b..e1396078e 100644
--- a/packages_rs/nextclade-cli/src/dataset/dataset_table.rs
+++ b/packages_rs/nextclade-cli/src/dataset/dataset_table.rs
@@ -18,18 +18,16 @@ pub fn format_dataset_table(filtered: &[Dataset]) -> String {
     "reference".to_owned(),
     "tag".to_owned(),
     "attributes".to_owned(),
-    "comment".to_owned(),
   ]);
 
   for dataset in filtered.iter() {
     let Dataset {
-      attributes, comment, ..
+      version, attributes, ..
     } = dataset;
 
     let DatasetAttributes {
       name,
       reference,
-      tag,
       rest_attrs,
       ..
     } = &attributes;
@@ -37,7 +35,6 @@ pub fn format_dataset_table(filtered: &[Dataset]) -> String {
     let mut attrs = IndexMap::<String, &DatasetAttributeValue>::from([
       ("name".to_owned(), name),
       ("reference".to_owned(), reference),
-      ("tag".to_owned(), tag),
     ]);
 
     for (key, attr) in rest_attrs.iter() {
@@ -47,9 +44,8 @@ pub fn format_dataset_table(filtered: &[Dataset]) -> String {
     table.add_row([
       format_attr_value(name),
       format_attr_value(reference),
-      format_attr_value(tag),
+      version.tag.clone(),
       format_attributes(&attrs),
-      comment.clone(),
     ]);
   }
 
@@ -57,8 +53,8 @@ pub fn format_dataset_table(filtered: &[Dataset]) -> String {
 }
 
 pub fn format_attr_value_short(attr: &DatasetAttributeValue) -> String {
-  let DatasetAttributeValue { is_default, value, .. } = &attr;
-  if *is_default {
+  let DatasetAttributeValue { value, .. } = &attr;
+  if attr.is_default() {
     format!("{value} (*)")
   } else {
     value.clone()
diff --git a/packages_rs/nextclade-cli/src/dataset/mod.rs b/packages_rs/nextclade-cli/src/dataset/mod.rs
index 3bed03194..c96e51ebb 100644
--- a/packages_rs/nextclade-cli/src/dataset/mod.rs
+++ b/packages_rs/nextclade-cli/src/dataset/mod.rs
@@ -1,3 +1,2 @@
-pub mod dataset_attributes;
 pub mod dataset_download;
 pub mod dataset_table;
diff --git a/packages_rs/nextclade-cli/src/io/http_client.rs b/packages_rs/nextclade-cli/src/io/http_client.rs
index 15d67cdf6..ecca60d7b 100644
--- a/packages_rs/nextclade-cli/src/io/http_client.rs
+++ b/packages_rs/nextclade-cli/src/io/http_client.rs
@@ -1,9 +1,11 @@
 use clap::{Parser, ValueHint};
 use eyre::Report;
 use log::info;
-use nextclade::{getenv, make_internal_error};
+use nextclade::make_internal_error;
+use nextclade::utils::info::{this_package_name, this_package_version_str};
 use reqwest::blocking::Client;
-use reqwest::{IntoUrl, Method, Proxy};
+use reqwest::{Method, Proxy};
+use std::str::FromStr;
 use url::Url;
 
 #[derive(Parser, Debug, Default)]
@@ -32,6 +34,10 @@ pub struct HttpClient {
 
 impl HttpClient {
   pub fn new(root: &Url, proxy_conf: &ProxyConfig, verbose: bool) -> Result<Self, Report> {
+    // Append trailing slash to the root URL. Otherwise `Url::join()` replaces the path rather than appending.
+    // See: https://github.com/servo/rust-url/issues/333
+    let root = Url::from_str(&format!("{}/", root.as_str()))?;
+
     let mut client_builder = Client::builder();
 
     client_builder = if let Some(proxy_url) = &proxy_conf.proxy {
@@ -52,7 +58,7 @@ impl HttpClient {
       client_builder
     };
 
-    let user_agent = format!("{} {}", getenv!("CARGO_PKG_NAME"), getenv!("CARGO_PKG_VERSION"));
+    let user_agent = format!("{} {}", this_package_name(), this_package_version_str());
 
     let client = client_builder
       .connection_verbose(verbose)
@@ -60,40 +66,46 @@ impl HttpClient {
       .user_agent(user_agent)
       .build()?;
 
-    Ok(Self {
-      client,
-      root: root.clone(),
-    })
+    Ok(Self { client, root })
   }
 
-  pub fn get<U: IntoUrl + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
+  pub fn get<U: AsRef<str> + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
     self.request(Method::GET, url)
   }
 
-  pub fn post<U: IntoUrl + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
+  pub fn post<U: AsRef<str> + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
     self.request(Method::POST, url)
   }
 
-  pub fn put<U: IntoUrl + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
+  pub fn put<U: AsRef<str> + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
     self.request(Method::PUT, url)
   }
 
-  pub fn patch<U: IntoUrl + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
+  pub fn patch<U: AsRef<str> + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
     self.request(Method::PATCH, url)
   }
 
-  pub fn delete<U: IntoUrl + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
+  pub fn delete<U: AsRef<str> + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
     self.request(Method::DELETE, url)
   }
 
-  pub fn head<U: IntoUrl + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
+  pub fn head<U: AsRef<str> + ?Sized>(&self, url: &U) -> Result<Vec<u8>, Report> {
     self.request(Method::HEAD, url)
   }
 
-  pub fn request<U: IntoUrl + ?Sized>(&self, method: Method, url: &U) -> Result<Vec<u8>, Report> {
-    let abs_url = self.root.join(url.as_str())?;
+  pub fn request<U: AsRef<str> + ?Sized>(&self, method: Method, url: &U) -> Result<Vec<u8>, Report> {
+    // Trim leading '/', otherwise Url::join() replaces the path rather than appending.
+    // See: https://github.com/servo/rust-url/issues/333
+    let url = url.as_ref().trim_start_matches('/');
+    let abs_url = self.root.join(url)?;
     info!("HTTP '{method}' request to '{abs_url}'");
-    let content = self.client.request(method, abs_url).send()?.bytes()?.to_vec();
+    let content = self
+      .client
+      .request(method, abs_url)
+      .send()?
+      .error_for_status()?
+      .bytes()?
+      .to_vec();
     Ok(content)
   }
 }
diff --git a/packages_rs/nextclade-web/Cargo.toml b/packages_rs/nextclade-web/Cargo.toml
index 13364ba34..08f8b8da2 100644
--- a/packages_rs/nextclade-web/Cargo.toml
+++ b/packages_rs/nextclade-web/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "nextclade-web"
-version = "2.14.0"
+version = "3.0.0-alpha.0"
 description = "Alignment, mutation calling, phylogenetic placement, clade assignment and quality control checks for viral genetic sequences. WebAssembly module."
 edition = "2021"
 license = "MIT"
@@ -12,6 +12,7 @@ crate-type = ["cdylib", "rlib"]
 
 [dependencies]
 assert2 = "=0.3.11"
+chrono = { version = "=0.4.26", default-features = false, features = ["clock", "std", "wasmbind"] }
 console_error_panic_hook = "=0.1.7"
 eyre = "=0.6.8"
 getrandom = { version = "=0.2.10", features = ["js"] }
@@ -19,12 +20,12 @@ itertools = "=0.11.0"
 js-sys = { version = "=0.3.64", features = [] }
 log = "=0.4.19"
 nextclade = { path = "../nextclade" }
+schemars = { version = "=0.8.12", features = ["chrono", "either", "enumset", "indexmap1"] }
 serde = { version = "=1.0.164", features = ["derive"] }
 serde-wasm-bindgen = { version = "=0.5.0" }
 wasm-bindgen = { version = "=0.2.87", features = ["serde-serialize"] }
 wasm-logger = "=0.2.0"
 web-sys = { version = "=0.3.64", features = ["console"] }
-schemars = { version = "=0.8.12", features = ["chrono", "either", "enumset", "indexmap1"] }
 
 [build-dependencies]
 nextclade = { path = "../nextclade" }
diff --git a/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts b/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts
index b040b9fd3..ef8a0b4f3 100644
--- a/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts
+++ b/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts
@@ -10,6 +10,7 @@ export function getEnvVars() {
   const DOMAIN = getDomain()
   const DOMAIN_STRIPPED = DOMAIN.replace('https://', '').replace('http://', '')
   const DATA_FULL_DOMAIN = getenv('DATA_FULL_DOMAIN')
+  const DATA_TRY_GITHUB_BRANCH = getenv('DATA_TRY_GITHUB_BRANCH')
 
   const common = {
     BABEL_ENV,
@@ -20,6 +21,7 @@ export function getEnvVars() {
     DOMAIN,
     DOMAIN_STRIPPED,
     DATA_FULL_DOMAIN,
+    DATA_TRY_GITHUB_BRANCH,
   }
 
   if (PRODUCTION) {
diff --git a/packages_rs/nextclade-web/config/next/next.config.ts b/packages_rs/nextclade-web/config/next/next.config.ts
index f38858486..22cd557fa 100644
--- a/packages_rs/nextclade-web/config/next/next.config.ts
+++ b/packages_rs/nextclade-web/config/next/next.config.ts
@@ -46,6 +46,7 @@ const {
   DOMAIN,
   DOMAIN_STRIPPED,
   DATA_FULL_DOMAIN,
+  DATA_TRY_GITHUB_BRANCH,
 } = getEnvVars()
 
 const BRANCH_NAME = getGitBranch()
@@ -61,6 +62,7 @@ const clientEnv = {
   DOMAIN,
   DOMAIN_STRIPPED,
   DATA_FULL_DOMAIN,
+  DATA_TRY_GITHUB_BRANCH,
   BLOCK_SEARCH_INDEXING: DOMAIN === RELEASE_URL ? '0' : '1',
 }
 
diff --git a/packages_rs/nextclade-web/package.json b/packages_rs/nextclade-web/package.json
index c33efea45..7c5b95673 100644
--- a/packages_rs/nextclade-web/package.json
+++ b/packages_rs/nextclade-web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@nextstrain/nextclade-web",
-  "version": "2.14.1",
+  "version": "3.0.0-alpha.0",
   "description": "Clade assignment, mutation calling, and sequence quality checks",
   "homepage": "https://clades.nextstrain.org",
   "repository": {
@@ -109,6 +109,7 @@
     "i18next": "19.3.2",
     "immutable": "4.0.0",
     "intercept-stdout": "0.1.2",
+    "is-absolute-url": "3.0.3",
     "jschardet": "3.0.0",
     "jsonexport": "3.2.0",
     "jszip": "3.9.1",
@@ -116,6 +117,7 @@
     "luxon": "2.3.2",
     "marked": "4.0.14",
     "memoize-one": "6.0.0",
+    "nanoid": "3.3.6",
     "next": "12.1.6",
     "next-compose-plugins": "2.2.1",
     "numbro": "2.3.6",
@@ -132,9 +134,10 @@
     "react-file-icon": "1.1.0",
     "react-helmet": "6.1.0",
     "react-i18next": "11.3.3",
-    "react-icons": "4.3.1",
+    "react-icons": "4.11.0",
     "react-if": "4.1.4",
     "react-loader-spinner": "5.1.4",
+    "react-markdown": "6.0.3",
     "react-no-ssr": "1.1.0",
     "react-redux": "7.2.8",
     "react-resize-detector": "7.0.0",
@@ -145,13 +148,16 @@
     "react-window": "1.8.7",
     "reactstrap": "8.10.1",
     "recharts": "2.1.9",
-    "recoil": "0.7.6",
+    "recoil": "0.7.7",
     "recoil-persist": "4.2.0",
     "redux": "4.2.0",
     "redux-saga": "1.1.3",
     "redux-thunk": "2.4.1",
     "reflect-metadata": "0.1.13",
     "regenerator-runtime": "0.13.9",
+    "rehype-raw": "5.1.0",
+    "rehype-sanitize": "4.0.0",
+    "remark-gfm": "1.0.0",
     "reselect": "4.1.5",
     "semver": "7.3.7",
     "serialize-javascript": "6.0.0",
@@ -234,13 +240,13 @@
     "allow-methods": "3.1.0",
     "babel-plugin-parameter-decorator": "1.0.16",
     "babel-plugin-transform-typescript-metadata": "0.3.2",
+    "commander": "10.0.1",
     "compression-webpack-plugin": "9.2.0",
     "connect-history-api-fallback": "1.6.0",
     "conventional-changelog-cli": "2.2.2",
     "copy-webpack-plugin": "10.2.4",
     "cross-env": "7.0.3",
     "css-loader": "6.7.1",
-    "commander": "10.0.1",
     "dotenv": "16.0.0",
     "eslint": "8.14.0",
     "eslint-config-airbnb": "19.0.4",
@@ -319,7 +325,7 @@
     "remark-autolink-headings": "6.0.1",
     "remark-breaks": "2.0.1",
     "remark-images": "2.0.0",
-    "remark-math": "3.0.1",
+    "remark-math": "4.0.0",
     "remark-slug": "6.0.0",
     "remark-toc": "7.0.0",
     "rimraf": "3.0.2",
diff --git a/packages_rs/nextclade-web/src/build.rs b/packages_rs/nextclade-web/src/build.rs
index a857750ce..6dcd7284b 100644
--- a/packages_rs/nextclade-web/src/build.rs
+++ b/packages_rs/nextclade-web/src/build.rs
@@ -1,9 +1,11 @@
 use eyre::Report;
-use nextclade::analyze::pcr_primers::PcrPrimer;
+use nextclade::analyze::pcr_primer_changes::PcrPrimer;
 use nextclade::analyze::virus_properties::{PhenotypeAttrDesc, VirusProperties};
 use nextclade::gene::gene_map::GeneMap;
-use nextclade::io::dataset::{DatasetTagJson, DatasetsIndexJson};
-use nextclade::io::errors_csv::ErrorsFromWeb;
+use nextclade::io::dataset::{
+  Dataset, DatasetAttributeValue, DatasetAttributes, DatasetCapabilities, DatasetCollectionMeta, DatasetCollectionUrl,
+  DatasetsIndexJson,
+};
 use nextclade::io::fasta::FastaRecord;
 use nextclade::io::file::create_file_or_stdout;
 use nextclade::io::fs::ensure_dir;
@@ -14,6 +16,8 @@ use nextclade::qc::qc_run::QcResult;
 use nextclade::run::nextclade_wasm::{
   AnalysisInitialData, AnalysisInput, NextcladeParams, NextcladeParamsRaw, NextcladeResult, OutputTrees,
 };
+use nextclade::sort::minimizer_index::MinimizerIndexJson;
+use nextclade::sort::minimizer_search::{MinimizerSearchRecord, MinimizerSearchResult};
 use nextclade::translate::translate_genes::Translation;
 use nextclade::tree::tree::{AuspiceTree, CladeNodeAttrKeyDesc};
 use nextclade::types::outputs::{NextcladeErrorOutputs, NextcladeOutputs};
@@ -44,7 +48,7 @@ fn write_jsonschema<T: JsonSchema>(output_file: impl AsRef<Path>) -> Result<(),
 /// it. Instead, See the actual types in the `definitions` property of JSON schema.
 #[derive(Clone, Debug, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-struct _SchemaRoot {
+struct _SchemaRoot<'a> {
   _1: GeneMap,
   _2: Translation,
   _3: AuspiceTree,
@@ -52,19 +56,26 @@ struct _SchemaRoot {
   _5: QcResult,
   _6: PcrPrimer,
   _7: NextcladeOutputs,
-  _8: DatasetsIndexJson,
   _9: CsvColumnConfig,
   _10: NextcladeErrorOutputs,
-  _11: ErrorsFromWeb,
   _12: VirusProperties,
   _13: CladeNodeAttrKeyDesc,
   _14: PhenotypeAttrDesc,
   _15: FastaRecord,
-  _16: DatasetTagJson,
-  _17: AnalysisInitialData,
+  _17: AnalysisInitialData<'a>,
   _18: AnalysisInput,
   _19: NextcladeResult,
-  _21: NextcladeParams,
-  _22: NextcladeParamsRaw,
-  _23: OutputTrees,
+  _20: NextcladeParams,
+  _21: NextcladeParamsRaw,
+  _22: OutputTrees,
+  _23: DatasetsIndexJson,
+  _24: Dataset,
+  _25: DatasetCollectionMeta,
+  _26: DatasetCapabilities,
+  _27: DatasetAttributeValue,
+  _28: DatasetAttributes,
+  _29: DatasetCollectionUrl,
+  _30: MinimizerIndexJson,
+  _31: MinimizerSearchResult,
+  _32: MinimizerSearchRecord,
 }
diff --git a/packages_rs/nextclade-web/src/components/About/About.tsx b/packages_rs/nextclade-web/src/components/About/About.tsx
deleted file mode 100644
index 7f15d6fde..000000000
--- a/packages_rs/nextclade-web/src/components/About/About.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import React from 'react'
-
-import AboutContent from './AboutContent.mdx'
-
-export function About() {
-  return <AboutContent />
-}
diff --git a/packages_rs/nextclade-web/src/components/About/AboutContent.mdx b/packages_rs/nextclade-web/src/components/About/AboutContent.mdx
deleted file mode 100644
index 9f97e267a..000000000
--- a/packages_rs/nextclade-web/src/components/About/AboutContent.mdx
+++ /dev/null
@@ -1,43 +0,0 @@
-import { CladeSchema } from 'src/components/Main/CladeSchema.tsx'
-
-## What is Nextclade?
-
-Nextclade is a tool that performs genetic sequence alignment, clade assignment, mutation calling, phylogenetic placement, and quality checks for SARS-CoV-2, Influenza (Flu), Mpox (Monkeypox), Respiratory Syncytial Virus (RSV) and other pathogens.
-
-Nextclade identifies differences between your sequences and a reference sequence, uses these differences to assign your sequences to clades, reports potential sequence quality issues in your data, and shows how the sequences are related to each other by placing them into an existing phylogenetic tree (we call it "phylogenetic placement"). You can use the tool to analyze sequences before you upload them to a database, or if you want to assign Nextstrain clades to a set of sequences.
-
-To analyze your data, drag a fasta file onto the upload box or paste sequences into the text box. These sequences will then be analyzed in your browser - data never leave your computer. Since your computer is doing the work rather than a server, it is advisable to analyze at most a few hundred sequences at a time.
-
-The Nextclade app and algorithms are opensource. The code is available on [GitHub](https://github.com/nextstrain/nextclade). The user manual is available at [docs.nextstrain.org/projects/nextclade](https://docs.nextstrain.org/projects/nextclade).
-
-
-### What are the SARS-CoV-2 clades?
-
-Nextclade was originally developed during COVID-19 pandemic, primarily focused on SARS-CoV-2. This section describes clades with application to SARS-CoV-2, but Nextclade can analyse other pathogens too.
-
-<CladeSchema />
-
-Since its emergence in late 2019, SARS-CoV-2 has diversified into several different co-circulating variants. To facilitate discussion of these variants, we have grouped them into __clades__ which are defined by specific signature mutations.
-
-We currently define more than 30 clades (see [this blog post](https://nextstrain.org/blog/2021-01-06-updated-SARS-CoV-2-clade-naming) for details):
-
-- 19A and 19B emerged in Wuhan and have dominated the early outbreak
-- 20A emerged from 19A out of dominated the European outbreak in March and has since spread globally
-- 20B and 20C are large genetically distinct subclades 20A emerged in early 2020
-- 20D to 20J have emerged over the summer of 2020 and include three "Variants of Concern" (VoC).
-- 21A to 21F include the VoC __delta__ and several Variants of Interest (VoI).
-- 21K onwards are different clades within the diverse VoC __omicron__.
-
-Within Nextstrain, we define each clade by its combination of signature mutations. You can find the exact clade definition in [github.com/nextstrain/ncov/defaults/clades.tsv](https://github.com/nextstrain/ncov/blob/master/defaults/clades.tsv). When available, we will include [WHO labels for VoCs and VoIs](https://www.who.int/en/activities/tracking-SARS-CoV-2-variants/).
-
-Learn more about how Nextclade assigns clades in the [documentation](https://docs.nextstrain.org/projects/nextclade/en/stable/user/algorithm/).
-
-### Other pathogens
-
-Besides SARS-CoV-2, we provide Nextclade datasets to analyze the following other pathogens:
-
- * Seasonal Influenza viruses (HA and NA for A/H3N2, A/H1N1pdm, B/Vic, and B/Yam)
- * Mpox virus (the overall clade structure, as well as fine-grained lineages within the recent sustained human-to-human transmission)
- * Respiratory Syncytial Virus (RSV) (subtypes A and B)
-
-You can also put together your own dataset to analyse other pathogens.
diff --git a/packages_rs/nextclade-web/src/components/Autodetect/AutodetectPage.tsx b/packages_rs/nextclade-web/src/components/Autodetect/AutodetectPage.tsx
new file mode 100644
index 000000000..4dd2a51e6
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Autodetect/AutodetectPage.tsx
@@ -0,0 +1,249 @@
+// import classNames from 'classnames'
+// import { sortBy } from 'lodash'
+// import { mix, transparentize } from 'polished'
+// import React, { useMemo } from 'react'
+// import { Col as ColBase, Row as RowBase } from 'reactstrap'
+// import { useRecoilValue } from 'recoil'
+// import styled, { useTheme } from 'styled-components'
+// import type { MinimizerIndexJson, MinimizerSearchRecord } from 'src/types'
+// import { isEven } from 'src/helpers/number'
+// import { TableSlim } from 'src/components/Common/TableSlim'
+// import { Layout } from 'src/components/Layout/Layout'
+// import { safeZip3 } from 'src/helpers/safeZip'
+// import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+// import { autodetectResultsAtom, minimizerIndexAtom } from 'src/state/autodetect.state'
+//
+// const Container = styled.div`
+//   margin-top: 1rem;
+//   padding-bottom: 1.5rem;
+//   height: 100%;
+//   overflow: hidden;
+// `
+//
+// const Row = styled(RowBase)`
+//   overflow: hidden;
+//   height: 100%;
+// `
+//
+// const Col = styled(ColBase)`
+//   overflow: hidden;
+//   height: 100%;
+// `
+//
+// const Table = styled(TableSlim)`
+//   padding-top: 50px;
+//
+//   & thead {
+//     height: 51px;
+//     position: sticky;
+//     top: -2px;
+//     background-color: ${(props) => props.theme.gray700};
+//     color: ${(props) => props.theme.gray100};
+//   }
+//
+//   & thead th {
+//     margin: auto;
+//     text-align: center;
+//     vertical-align: middle;
+//   }
+//
+//   & td {
+//     border: none;
+//     border-left: 1px solid #ccc;
+//   }
+//
+//   & tr {
+//     border: none !important;
+//   }
+//
+//   & th {
+//     border: 1px solid #ccc;
+//   }
+// `
+//
+// const TableWrapper = styled.div`
+//   height: 100%;
+//   overflow-y: auto;
+// `
+//
+// export function AutodetectPage() {
+//   const { t } = useTranslationSafe()
+//   const minimizerIndex = useRecoilValue(minimizerIndexAtom)
+//   const autodetectResults = useRecoilValue(autodetectResultsAtom)
+//
+//   const rows = useMemo(() => {
+//     const results = sortBy(autodetectResults, (result) => result.fastaRecord.index)
+//     return results.map((res, i) => (
+//       <AutodetectTableRowSpan key={res.fastaRecord.index} order={i} res={res} minimizerIndex={minimizerIndex} />
+//     ))
+//   }, [autodetectResults, minimizerIndex])
+//
+//   return (
+//     <Layout>
+//       <Container>
+//         <Row noGutters>
+//           <Col>
+//             <TableWrapper>
+//               <Table>
+//                 <thead>
+//                   <tr>
+//                     <th>{'#'}</th>
+//                     <th>{t('Seq. name')}</th>
+//                     <th>{t('Length')}</th>
+//                     <th>{t('Total nHits')}</th>
+//                     <th>{t('Max norm. hit')}</th>
+//                     <th>{t('Dataset')}</th>
+//                     <th>{t('Ref. length')}</th>
+//                     <th>{t('Num. nHits')}</th>
+//                     <th>{t('Norm. hit')}</th>
+//                   </tr>
+//                 </thead>
+//
+//                 <tbody>{rows}</tbody>
+//               </Table>
+//             </TableWrapper>
+//           </Col>
+//         </Row>
+//       </Container>
+//     </Layout>
+//   )
+// }
+//
+// interface AutodetectTableRowSpanProps {
+//   order: number
+//   res: MinimizerSearchRecord
+//   minimizerIndex: MinimizerIndexJson
+// }
+//
+// function AutodetectTableRowSpan({ order, res, minimizerIndex }: AutodetectTableRowSpanProps) {
+//   const theme = useTheme()
+//
+//   const { datasets, maxScore, totalHits } = res.result
+//   const { seqName, index: seqIndex, seq } = res.fastaRecord
+//   const qryLen = seq.length
+//
+//   const rows = useMemo(() => {
+//     let entries = sortBy(datasets, (entry) => -entry.score)
+//
+//     let color = isEven(order) ? theme.table.rowBg.even : theme.table.rowBg.odd
+//
+//     const goodEntries = entries.filter(({ score, nHits }) => maxScore >= 0.6 && nHits >= 10 && score >= maxScore * 0.5)
+//
+//     const mediocreEntries = entries.filter(
+//       ({ score, nHits }) => maxScore >= 0.3 && nHits >= 10 && score >= maxScore * 0.5,
+//     )
+//
+//     const badEntries = entries.filter(({ score, nHits }) => maxScore >= 0.05 && nHits > 0 && score >= maxScore * 0.5)
+//
+//     if (goodEntries.length > 0) {
+//       entries = goodEntries
+//     } else if (mediocreEntries.length > 0) {
+//       entries = mediocreEntries
+//       color = mix(0.3, transparentize(0.3)(theme.warning), color)
+//     } else {
+//       entries = badEntries
+//       color = mix(0.5, transparentize(0.5)(theme.danger), color)
+//     }
+//
+//     return entries.map(({ dataset, score, nHits, refLen }, i) => {
+//       const cls = classNames(i === 0 && 'font-weight-bold')
+//
+//       return (
+//         <Tr key={dataset} $bg={color}>
+//           {i === 0 && (
+//             <>
+//               <TdIndex rowSpan={entries.length} title={seqIndex.toString()}>
+//                 {seqIndex}
+//               </TdIndex>
+//
+//               <TdName rowSpan={entries.length} title={seqName}>
+//                 {seqName}
+//               </TdName>
+//
+//               <TdNumeric rowSpan={entries.length} title={qryLen.toString()}>
+//                 {qryLen}
+//               </TdNumeric>
+//
+//               <TdNumeric rowSpan={entries.length} title={totalHits.toString()}>
+//                 {totalHits}
+//               </TdNumeric>
+//
+//               <TdNumeric rowSpan={entries.length} title={maxScore.toFixed(3)}>
+//                 {maxScore.toFixed(3)}
+//               </TdNumeric>
+//             </>
+//           )}
+//           <TdName className={cls} title={dataset}>
+//             {dataset}
+//           </TdName>
+//           <TdNumeric className={cls} title={refLen.toString()}>
+//             {refLen}
+//           </TdNumeric>
+//           <TdNumeric className={cls} title={nHits.toString()}>
+//             {nHits}
+//           </TdNumeric>
+//           <TdNumeric className={cls} title={score.toFixed(3)}>
+//             {score.toFixed(3)}
+//           </TdNumeric>
+//         </Tr>
+//       )
+//     })
+//   }, [
+//     datasets,
+//     order,
+//     theme.table.rowBg.even,
+//     theme.table.rowBg.odd,
+//     theme.warning,
+//     theme.danger,
+//     maxScore,
+//     seqIndex,
+//     seqName,
+//     qryLen,
+//     totalHits,
+//   ])
+//
+//   return (
+//     <>
+//       {rows}
+//       <TrSpacer>
+//         <td colSpan={9} />
+//       </TrSpacer>
+//     </>
+//   )
+// }
+//
+// const Tr = styled.tr<{ $bg?: string }>`
+//   background-color: ${(props) => props.$bg};
+// `
+//
+// const Td = styled.td`
+//   white-space: nowrap;
+//   overflow: hidden;
+//   text-overflow: ellipsis;
+//   max-width: 100px;
+//   font-size: 0.95rem;
+// `
+//
+// const TdName = styled(Td)`
+//   min-width: 200px;
+//   font-size: 0.9rem;
+// `
+//
+// const TdNumeric = styled(Td)`
+//   text-align: right;
+//   font-family: ${(props) => props.theme.font.monospace};
+//   font-size: 0.9rem;
+// `
+//
+// const TdIndex = styled(TdNumeric)`
+//   background-color: ${(props) => props.theme.gray700};
+//   color: ${(props) => props.theme.gray100};
+// `
+//
+// const TrSpacer = styled.tr`
+//   height: 2px;
+//
+//   & td {
+//     background-color: ${(props) => props.theme.gray400};
+//   }
+// `
diff --git a/packages_rs/nextclade-web/src/components/Citation/CitationButton.tsx b/packages_rs/nextclade-web/src/components/Citation/CitationButton.tsx
index e8083cc76..96a586f51 100644
--- a/packages_rs/nextclade-web/src/components/Citation/CitationButton.tsx
+++ b/packages_rs/nextclade-web/src/components/Citation/CitationButton.tsx
@@ -1,5 +1,4 @@
 import React, { useCallback, useState } from 'react'
-
 import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslationSafe'
 import {
   Button,
@@ -13,19 +12,33 @@ import {
   Row,
 } from 'reactstrap'
 import styled from 'styled-components'
-import { HiOutlineAcademicCap } from 'react-icons/hi'
-
-import { ButtonTransparent } from 'src/components/Common/ButtonTransparent'
 import { Citation } from 'src/components/Citation/Citation'
 
-export const ButtonCitationBase = styled(ButtonTransparent)<ButtonProps>`
-  margin: 2px 2px;
-  height: 38px;
-  width: 50px;
-  color: ${(props) => props.theme.gray700};
-
-  @media (min-width: 1200px) {
-    width: 100px;
+export const ButtonCitationBase = styled(Button)<ButtonProps>`
+  color: ${(props) => props.theme.bodyColor};
+  padding: 0;
+  background-color: transparent;
+  background-image: none;
+  border: none;
+  border-radius: 0;
+  box-shadow: none;
+  border-image: none;
+  text-decoration: none;
+  -webkit-tap-highlight-color: #ccc;
+
+  & .active,
+  & :active,
+  & :hover,
+  & :target,
+  & :focus,
+  & :focus-visible,
+  & :focus-within {
+    background-color: transparent;
+    background-image: none;
+    border: none;
+    border-radius: 0;
+    box-shadow: none;
+    border-image: none;
   }
 `
 
@@ -100,8 +113,7 @@ export function CitationButton() {
 
   return (
     <>
-      <ButtonCitationBase type="button" onClick={open} title={text}>
-        <HiOutlineAcademicCap className="mr-xl-2" />
+      <ButtonCitationBase type="button" color="link" onClick={open} title={text}>
         <span className="d-none d-xl-inline">{text}</span>
       </ButtonCitationBase>
 
diff --git a/packages_rs/nextclade-web/src/components/Common/List.tsx b/packages_rs/nextclade-web/src/components/Common/List.tsx
index e8496a60f..0ec0fdc27 100644
--- a/packages_rs/nextclade-web/src/components/Common/List.tsx
+++ b/packages_rs/nextclade-web/src/components/Common/List.tsx
@@ -1,4 +1,4 @@
-import styled from 'styled-components'
+import styled, { css } from 'styled-components'
 
 export const Ul = styled.ul`
   padding-left: 1.5rem;
@@ -13,3 +13,35 @@ export const UlInvisible = styled.ul`
 export const LiInvisible = styled.li`
   list-style: none;
 `
+
+// @formatter:off
+// prettier-ignore
+export const ScrollShadowVerticalCss = css`
+  /** Taken from: https://css-tricks.com/books/greatest-css-tricks/scroll-shadows */
+  background:
+    /* Shadow Cover TOP */    linear-gradient(white 30%, rgba(255, 255, 255, 0)) center top,
+    /* Shadow Cover BOTTOM */ linear-gradient(rgba(255, 255, 255, 0), white 70%) center bottom,
+    /* Shadow TOP */          radial-gradient(farthest-side at 50% 0, rgba(0, 0, 0, 0.2), rgba(0, 0, 0, 0)) center top,
+    /* Shadow BOTTOM */       radial-gradient(farthest-side at 50% 100%, rgba(0, 0, 0, 0.2), rgba(0, 0, 0, 0)) center bottom;
+  background-repeat: no-repeat;
+  background-size: 100% 40px, 100% 40px, 100% 14px, 100% 14px;
+  background-attachment: local, local, scroll, scroll;
+`
+// @formatter:on
+
+export const ListGenericCss = css`
+  ${ScrollShadowVerticalCss};
+  list-style: none;
+  padding: 0;
+  margin: 0;
+  -webkit-overflow-scrolling: touch;
+  overflow-scrolling: touch;
+
+  & li {
+    border: 0;
+  }
+`
+
+export const UlGeneric = styled.ul`
+  ${ListGenericCss}
+`
diff --git a/packages_rs/nextclade-web/src/components/Common/Markdown.tsx b/packages_rs/nextclade-web/src/components/Common/Markdown.tsx
new file mode 100644
index 000000000..e9c0e22d1
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Common/Markdown.tsx
@@ -0,0 +1,45 @@
+import React, { Suspense } from 'react'
+import ReactMarkdown from 'react-markdown'
+import remarkGfm from 'remark-gfm'
+import rehypeRaw from 'rehype-raw'
+import rehypeSanitize from 'rehype-sanitize'
+import { LinkExternal } from 'src/components/Link/LinkExternal'
+import { useAxiosQuery } from 'src/helpers/useAxiosQuery'
+import { LOADING } from 'src/components/Loading/Loading'
+
+const REMARK_PLUGINS = [remarkGfm]
+
+const REHYPE_PLUGINS = [rehypeRaw, rehypeSanitize]
+
+const MD_COMPONENTS = {
+  a: LinkExternal,
+}
+
+export interface MarkdownProps {
+  content: string
+}
+
+export function Markdown({ content }: MarkdownProps) {
+  return (
+    <ReactMarkdown rehypePlugins={REHYPE_PLUGINS} remarkPlugins={REMARK_PLUGINS} components={MD_COMPONENTS}>
+      {content}
+    </ReactMarkdown>
+  )
+}
+
+export interface MarkdownRemoteProps {
+  url: string
+}
+
+function MarkdownRemoteImpl({ url }: MarkdownRemoteProps) {
+  const content = useAxiosQuery<string>(url)
+  return <Markdown content={content} />
+}
+
+export function MarkdownRemote({ url }: MarkdownRemoteProps) {
+  return (
+    <Suspense fallback={LOADING}>
+      <MarkdownRemoteImpl url={url} />
+    </Suspense>
+  )
+}
diff --git a/packages_rs/nextclade-web/src/components/Common/SearchBox.tsx b/packages_rs/nextclade-web/src/components/Common/SearchBox.tsx
new file mode 100644
index 000000000..c5b634d7a
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Common/SearchBox.tsx
@@ -0,0 +1,96 @@
+import React, { ChangeEvent, useCallback, useMemo, HTMLProps } from 'react'
+import styled from 'styled-components'
+import { Form, Input as InputBase } from 'reactstrap'
+import { MdSearch as IconSearchBase, MdClear as IconClearBase } from 'react-icons/md'
+import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+import { ButtonTransparent } from 'src/components/Common/ButtonTransparent'
+
+const SearchForm = styled(Form)`
+  display: inline;
+  position: relative;
+`
+
+const IconSearchWrapper = styled.span`
+  display: inline;
+  position: absolute;
+  padding: 5px 7px;
+`
+
+const IconSearch = styled(IconSearchBase)`
+  * {
+    color: ${(props) => props.theme.gray500};
+  }
+`
+
+const ButtonClear = styled(ButtonTransparent)`
+  display: inline;
+  position: absolute;
+  right: 0;
+  padding: 0 7px;
+`
+
+const IconClear = styled(IconClearBase)`
+  * {
+    color: ${(props) => props.theme.gray500};
+  }
+`
+
+const Input = styled(InputBase)`
+  display: inline !important;
+  padding-left: 35px;
+  padding-right: 30px;
+  height: 2.2em;
+`
+
+export interface SearchBoxProps extends Omit<HTMLProps<HTMLFormElement>, 'as'> {
+  searchTitle?: string
+  searchTerm: string
+  onSearchTermChange(term: string): void
+}
+
+export function SearchBox({ searchTitle, searchTerm, onSearchTermChange, ...restProps }: SearchBoxProps) {
+  const { t } = useTranslationSafe()
+
+  const onChange = useCallback(
+    (event: ChangeEvent<HTMLInputElement>) => {
+      onSearchTermChange(event.target.value)
+    },
+    [onSearchTermChange],
+  )
+
+  const onClear = useCallback(() => {
+    onSearchTermChange('')
+  }, [onSearchTermChange])
+
+  const buttonClear = useMemo(() => {
+    if (searchTerm.length === 0) {
+      return null
+    }
+    return (
+      <ButtonClear onClick={onClear} title={t('Clear')}>
+        <IconClear size={20} />
+      </ButtonClear>
+    )
+  }, [onClear, searchTerm.length, t])
+
+  return (
+    <SearchForm {...restProps}>
+      <IconSearchWrapper>
+        <IconSearch size={25} />
+      </IconSearchWrapper>
+      <Input
+        type="text"
+        title={searchTitle}
+        placeholder={searchTitle}
+        autoComplete="off"
+        autoCorrect="off"
+        autoCapitalize="off"
+        spellCheck="false"
+        data-gramm="false"
+        value={searchTerm}
+        onChange={onChange}
+      />
+      {buttonClear}
+    </SearchForm>
+  )
+}
diff --git a/packages_rs/nextclade-web/src/components/Common/Toggle.tsx b/packages_rs/nextclade-web/src/components/Common/Toggle.tsx
index ab6afbcb6..5f15d029a 100644
--- a/packages_rs/nextclade-web/src/components/Common/Toggle.tsx
+++ b/packages_rs/nextclade-web/src/components/Common/Toggle.tsx
@@ -6,6 +6,8 @@ import ReactToggle, { ToggleProps as ReactToggleProps } from 'react-toggle'
 import 'react-toggle/style.css'
 
 export const ToggleBase = styled(ReactToggle)<ReactToggleProps>`
+  display: block;
+
   &.react-toggle-custom {
     & > .react-toggle-track {
       background-color: #9c3434;
diff --git a/packages_rs/nextclade-web/src/components/FilePicker/FilePicker.tsx b/packages_rs/nextclade-web/src/components/FilePicker/FilePicker.tsx
index 30f2e95b9..87aaf3d68 100644
--- a/packages_rs/nextclade-web/src/components/FilePicker/FilePicker.tsx
+++ b/packages_rs/nextclade-web/src/components/FilePicker/FilePicker.tsx
@@ -16,6 +16,7 @@ import { UploadedFileInfo } from './UploadedFileInfo'
 import { UploadedFileInfoCompact } from './UploadedFileInfoCompact'
 
 export const FilePickerContainer = styled.div`
+  flex: 1;
   display: flex;
   flex-direction: column;
 `
@@ -38,7 +39,11 @@ export const FilePickerTitle = styled.h4`
   margin: auto 0;
 `
 
-export const TabsPanelStyled = styled(TabsPanel)``
+export const TabsPanelStyled = styled(TabsPanel)`
+  * {
+    background: transparent !important;
+  }
+`
 
 const TabsContentStyled = styled(TabsContent)`
   height: 100%;
@@ -106,12 +111,12 @@ export function FilePicker({
   const onPaste = useCallback(
     (content: string) => {
       if (multiple) {
-        onInputs?.([new AlgorithmInputString(content)])
+        onInputs?.([new AlgorithmInputString(content, t('Pasted sequences'))])
       } else {
-        onInput?.(new AlgorithmInputString(content))
+        onInput?.(new AlgorithmInputString(content, t('Pasted sequences')))
       }
     },
-    [multiple, onInput, onInputs],
+    [multiple, onInput, onInputs, t],
   )
 
   // eslint-disable-next-line no-void
diff --git a/packages_rs/nextclade-web/src/components/FilePicker/FilePickerAdvanced.tsx b/packages_rs/nextclade-web/src/components/FilePicker/FilePickerAdvanced.tsx
index d77cae09a..0eac95b9f 100644
--- a/packages_rs/nextclade-web/src/components/FilePicker/FilePickerAdvanced.tsx
+++ b/packages_rs/nextclade-web/src/components/FilePicker/FilePickerAdvanced.tsx
@@ -4,20 +4,8 @@ import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslation
 import { Col, Row } from 'reactstrap'
 import { useRecoilState, useRecoilValue, useResetRecoilState } from 'recoil'
 
-import {
-  geneMapErrorAtom,
-  qcConfigErrorAtom,
-  refSeqErrorAtom,
-  refTreeErrorAtom,
-  virusPropertiesErrorAtom,
-} from 'src/state/error.state'
-import {
-  geneMapInputAtom,
-  qcConfigInputAtom,
-  refSeqInputAtom,
-  refTreeInputAtom,
-  virusPropertiesInputAtom,
-} from 'src/state/inputs.state'
+import { geneMapErrorAtom, refSeqErrorAtom, refTreeErrorAtom, virusPropertiesErrorAtom } from 'src/state/error.state'
+import { geneMapInputAtom, refSeqInputAtom, refTreeInputAtom, virusPropertiesInputAtom } from 'src/state/inputs.state'
 
 import { FileIconFasta, FileIconGff, FileIconJson } from 'src/components/Common/FileIcons'
 import { FilePicker } from 'src/components/FilePicker/FilePicker'
@@ -37,10 +25,6 @@ export function FilePickerAdvanced() {
   const refTreeError = useRecoilValue(refTreeErrorAtom)
   const resetRefTree = useResetRecoilState(refTreeInputAtom)
 
-  const [qcConfig, setQcConfig] = useRecoilState(qcConfigInputAtom)
-  const qcConfigError = useRecoilValue(qcConfigErrorAtom)
-  const resetQcConfig = useResetRecoilState(qcConfigInputAtom)
-
   const [virusProperties, setVirusProperties] = useRecoilState(virusPropertiesInputAtom)
   const virusPropertiesError = useRecoilValue(virusPropertiesErrorAtom)
   const resetVirusProperties = useResetRecoilState(virusPropertiesInputAtom)
@@ -78,19 +62,6 @@ export function FilePickerAdvanced() {
           onInput={setRefSeq}
         />
 
-        <FilePicker
-          className="my-3"
-          compact
-          icon={iconJson}
-          title={t('Quality control')}
-          exampleUrl="https://example.com/qc.json"
-          pasteInstructions={t('Enter QC config in JSON format')}
-          input={qcConfig}
-          error={qcConfigError}
-          onRemove={resetQcConfig}
-          onInput={setQcConfig}
-        />
-
         <FilePicker
           className="my-3"
           compact
diff --git a/packages_rs/nextclade-web/src/components/FilePicker/UploadBox.tsx b/packages_rs/nextclade-web/src/components/FilePicker/UploadBox.tsx
index 24d1995e6..42a711b95 100644
--- a/packages_rs/nextclade-web/src/components/FilePicker/UploadBox.tsx
+++ b/packages_rs/nextclade-web/src/components/FilePicker/UploadBox.tsx
@@ -56,6 +56,7 @@ export const UploadZoneTextContainer = styled.div`
 export const UploadZoneText = styled.div`
   font-size: 1.1rem;
   text-align: center;
+  max-width: 150px;
 `
 
 export const UploadZoneButton = styled(Button)`
@@ -83,7 +84,7 @@ export function UploadBox({ onUpload, children, multiple = false, ...props }: Pr
   const normal = useMemo(
     () => (
       <UploadZoneTextContainer>
-        <UploadZoneText>{t('Drag & drop files')}</UploadZoneText>
+        <UploadZoneText>{t('Drag & drop files or folders')}</UploadZoneText>
         <UploadZoneButton color="primary">{t('Select files')}</UploadZoneButton>
       </UploadZoneTextContainer>
     ),
diff --git a/packages_rs/nextclade-web/src/components/Layout/Footer.tsx b/packages_rs/nextclade-web/src/components/Layout/Footer.tsx
index b7b2bdfd6..42dbdc094 100644
--- a/packages_rs/nextclade-web/src/components/Layout/Footer.tsx
+++ b/packages_rs/nextclade-web/src/components/Layout/Footer.tsx
@@ -1,29 +1,27 @@
 import React from 'react'
-
 import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslationSafe'
-import { Col, Container, Row } from 'reactstrap'
+import { Col, Row } from 'reactstrap'
 import styled from 'styled-components'
-
 import { PROJECT_NAME, COMPANY_NAME } from 'src/constants'
 import { getCopyrightYearRange } from 'src/helpers/getCopyrightYearRange'
 import { LinkExternal } from 'src/components/Link/LinkExternal'
 import { getVersionString } from 'src/helpers/getVersionString'
-
 import LogoBedfordlab from 'src/assets/img/bedfordlab.svg'
 import LogoBiozentrum from 'src/assets/img/biozentrum_square.svg'
 import LogoSib from 'src/assets/img/sib.logo.svg'
 import LogoFredHutch from 'src/assets/img/fred_hutch.svg'
 import LogoNeherlab from 'src/assets/img/neherlab.svg'
-// impoas from from 'src/assets/img/nextstrain_logo.svg'
-// impoas from from 'src/assets/img/unibas.svg'
 import LogoVercel from 'src/assets/img/powered-by-vercel.svg'
 
-const FooterContainer = styled(Container)`
-  background-color: #2a2a2a;
-  color: #c4cdd5;
+const Container = styled.footer`
+  height: 38px;
+  width: 100%;
+  bottom: 0;
   padding: 6px 10px;
-  border-top-left-radius: 3px;
-  border-top-right-radius: 3px;
+  box-shadow: ${(props) => props.theme.shadows.large};
+  z-index: 1000;
+  background-color: ${(props) => props.theme.white};
+  opacity: 1;
 `
 
 const CopyrightText = styled.div`
@@ -76,12 +74,12 @@ const VersionText = styled.div`
   }
 `
 
-export default function Footer() {
+export function Footer() {
   const { t } = useTranslation()
   const copyrightYearRange = getCopyrightYearRange()
 
   return (
-    <FooterContainer fluid tag="footer">
+    <Container>
       <Row noGutters>
         <Col className="d-flex">
           <CopyrightText className="mr-auto my-auto">
@@ -121,6 +119,6 @@ export default function Footer() {
           <VersionText className="ml-auto my-auto">{getVersionString()}</VersionText>
         </Col>
       </Row>
-    </FooterContainer>
+    </Container>
   )
 }
diff --git a/packages_rs/nextclade-web/src/components/Layout/LanguageSwitcher.tsx b/packages_rs/nextclade-web/src/components/Layout/LanguageSwitcher.tsx
index d0455bf72..a788f45ec 100644
--- a/packages_rs/nextclade-web/src/components/Layout/LanguageSwitcher.tsx
+++ b/packages_rs/nextclade-web/src/components/Layout/LanguageSwitcher.tsx
@@ -1,7 +1,13 @@
 import React, { useCallback, useMemo, useState } from 'react'
-import { Dropdown, DropdownToggle, DropdownMenu, DropdownItem, DropdownProps } from 'reactstrap'
+import {
+  Dropdown as DropdownBase,
+  DropdownToggle as DropdownToggleBase,
+  DropdownMenu as DropdownMenuBase,
+  DropdownItem,
+  DropdownProps,
+} from 'reactstrap'
 import { useRecoilState } from 'recoil'
-
+import styled from 'styled-components'
 import { localeAtom } from 'src/state/locale.state'
 import { getLocaleWithKey, Locale, localesArray } from 'src/i18n/i18n'
 
@@ -14,11 +20,11 @@ export function LanguageSwitcher({ ...restProps }: LanguageSwitcherProps) {
   const setLocaleLocal = useCallback((locale: Locale) => () => setCurrentLocale(locale.key), [setCurrentLocale])
 
   return (
-    <Dropdown className="language-switcher" isOpen={dropdownOpen} toggle={toggle} {...restProps}>
+    <Dropdown isOpen={dropdownOpen} toggle={toggle} {...restProps}>
       <DropdownToggle nav caret>
-        <LanguageSwitcherItem locale={currentLocale} />
+        <LabelShort locale={currentLocale} />
       </DropdownToggle>
-      <DropdownMenu className="language-switcher-menu" positionFixed>
+      <DropdownMenu positionFixed>
         {localesArray.map((locale) => {
           const isCurrent = locale.key === currentLocale
           return (
@@ -33,20 +39,42 @@ export function LanguageSwitcher({ ...restProps }: LanguageSwitcherProps) {
 }
 
 export function LanguageSwitcherItem({ locale }: { locale: string }) {
-  const { Flag, name, native } = getLocaleWithKey(locale)
-
-  const label = useMemo(() => {
-    if (name === native) {
-      return name
-    }
-
-    return `${native} (${name})`
+  const { name, native } = getLocaleWithKey(locale)
+  const { label, tooltip } = useMemo(() => {
+    return { label: `(${native})`, tooltip: `${name} (${native})` }
   }, [name, native])
-
   return (
-    <>
-      <Flag className="language-switcher-flag" />
-      <span className="pl-2">{label}</span>
-    </>
+    <span title={tooltip}>
+      <LabelShort locale={locale} />
+      <span className="mx-2">{label}</span>
+    </span>
   )
 }
+
+export function LabelShort({ locale, ...restProps }: { locale: string; className?: string }) {
+  const { key } = getLocaleWithKey(locale)
+  return <LabelShortText {...restProps}>{key}</LabelShortText>
+}
+
+const LabelShortText = styled.span`
+  font-family: ${(props) => props.theme.font.monospace};
+  text-transform: uppercase !important;
+  color: unset !important;
+`
+
+const Dropdown = styled(DropdownBase)`
+  padding: 0;
+  margin: 0;
+`
+
+const DropdownToggle = styled(DropdownToggleBase)`
+  color: ${(props) => props.theme.bodyColor};
+  padding: 0;
+  margin: 0;
+`
+
+const DropdownMenu = styled(DropdownMenuBase)`
+  background-color: ${(props) => props.theme.bodyBg};
+  box-shadow: 1px 1px 20px 0 #0005;
+  transition: opacity ease-out 0.25s;
+`
diff --git a/packages_rs/nextclade-web/src/components/Layout/Layout.tsx b/packages_rs/nextclade-web/src/components/Layout/Layout.tsx
new file mode 100644
index 000000000..ef0d56bb6
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Layout/Layout.tsx
@@ -0,0 +1,49 @@
+import React, { PropsWithChildren, HTMLProps } from 'react'
+import styled from 'styled-components'
+
+import { NavigationBar } from './NavigationBar'
+import { Footer } from './Footer'
+import { UpdateNotification } from './UpdateNotification'
+
+const Container = styled.div`
+  display: flex;
+  flex-direction: column;
+  height: 100%;
+  width: 100%;
+  padding: 0;
+  margin: 0;
+`
+
+const HeaderWrapper = styled.header`
+  height: 45px;
+`
+
+const MainWrapper = styled.main`
+  flex: auto;
+  overflow: hidden;
+  height: 100%;
+  width: 100%;
+  padding: 0;
+  margin: 0;
+`
+
+const FooterWrapper = styled.footer``
+
+export function Layout({ children }: PropsWithChildren<HTMLProps<HTMLDivElement>>) {
+  return (
+    <Container>
+      <HeaderWrapper>
+        <NavigationBar />
+      </HeaderWrapper>
+
+      <MainWrapper>
+        <UpdateNotification />
+        {children}
+      </MainWrapper>
+
+      <FooterWrapper>
+        <Footer />
+      </FooterWrapper>
+    </Container>
+  )
+}
diff --git a/packages_rs/nextclade-web/src/components/Layout/LayoutMain.tsx b/packages_rs/nextclade-web/src/components/Layout/LayoutMain.tsx
deleted file mode 100644
index 135628c71..000000000
--- a/packages_rs/nextclade-web/src/components/Layout/LayoutMain.tsx
+++ /dev/null
@@ -1,71 +0,0 @@
-import { useRouter } from 'next/router'
-import React, { PropsWithChildren, HTMLProps, useCallback } from 'react'
-import { useRecoilValue } from 'recoil'
-import { hasRanAtom } from 'src/state/results.state'
-import styled from 'styled-components'
-import { Button, Container as ContainerBase } from 'reactstrap'
-import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslationSafe'
-import { FaCaretRight } from 'react-icons/fa'
-
-import { NavigationBar } from './NavigationBar'
-import FooterContent from './Footer'
-import { UpdateNotification } from './UpdateNotification'
-
-export const Container = styled(ContainerBase)`
-  max-height: 100vh;
-  max-width: ${(props) => props.theme.xl};
-  margin: 0 auto;
-
-  @media (max-width: 991.98px) {
-    padding-left: 10px;
-    padding-right: 10px;
-  }
-
-  @media (max-width: 767.98px) {
-    padding-left: 5px;
-    padding-right: 5px;
-  }
-`
-
-const Header = styled.header``
-
-const MainContent = styled.main`
-  margin: 0;
-`
-
-const Footer = styled.footer``
-
-const ButtonToResults = styled(Button)`
-  position: absolute;
-  z-index: 1000;
-  width: 140px;
-`
-
-export function LayoutMain({ children }: PropsWithChildren<HTMLProps<HTMLDivElement>>) {
-  const { t } = useTranslation()
-  const router = useRouter()
-  const goToResults = useCallback(() => router.push('/results'), [router])
-  const hasRan = useRecoilValue(hasRanAtom)
-
-  return (
-    <Container>
-      <Header>
-        <NavigationBar />
-      </Header>
-
-      <ButtonToResults hidden={!hasRan} color="secondary" onClick={goToResults}>
-        {t('To Results')}
-        <FaCaretRight />
-      </ButtonToResults>
-
-      <MainContent>
-        <UpdateNotification />
-        {children}
-      </MainContent>
-
-      <Footer>
-        <FooterContent />
-      </Footer>
-    </Container>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Layout/LayoutResults.tsx b/packages_rs/nextclade-web/src/components/Layout/LayoutResults.tsx
deleted file mode 100644
index 9530c5844..000000000
--- a/packages_rs/nextclade-web/src/components/Layout/LayoutResults.tsx
+++ /dev/null
@@ -1,51 +0,0 @@
-import React, { PropsWithChildren, HTMLProps } from 'react'
-
-import styled from 'styled-components'
-
-import { NavigationBar } from './NavigationBar'
-import FooterContent from './Footer'
-import { UpdateNotification } from './UpdateNotification'
-
-export const LayoutContainer = styled.div`
-  max-width: 100vw;
-  max-height: 100vh;
-  margin: 0;
-  width: 100%;
-  height: 100%;
-  display: flex;
-  flex-direction: column;
-  flex-wrap: nowrap;
-`
-
-const Header = styled.header`
-  flex-shrink: 0;
-`
-
-const MainContent = styled.main`
-  flex-grow: 1;
-  flex-basis: 100%;
-  min-height: 2em;
-`
-
-const Footer = styled.footer`
-  flex-shrink: 0;
-`
-
-export function LayoutResults({ children }: PropsWithChildren<HTMLProps<HTMLDivElement>>) {
-  return (
-    <LayoutContainer>
-      <Header>
-        <NavigationBar />
-      </Header>
-
-      <MainContent>
-        <UpdateNotification />
-        {children}
-      </MainContent>
-
-      <Footer>
-        <FooterContent />
-      </Footer>
-    </LayoutContainer>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Layout/NavigationBar.tsx b/packages_rs/nextclade-web/src/components/Layout/NavigationBar.tsx
index 9e54b35eb..89f737e01 100644
--- a/packages_rs/nextclade-web/src/components/Layout/NavigationBar.tsx
+++ b/packages_rs/nextclade-web/src/components/Layout/NavigationBar.tsx
@@ -1,139 +1,184 @@
-import React, { useMemo } from 'react'
-
-import { FaDocker, FaGithub, FaTwitter } from 'react-icons/fa'
-import { IoMdBook } from 'react-icons/io'
+import React, { ReactNode, useMemo } from 'react'
+import { useRouter } from 'next/router'
+import {
+  Nav as NavBase,
+  Navbar as NavbarBase,
+  NavbarBrand as NavbarBrandBase,
+  NavItem as NavItemBase,
+} from 'reactstrap'
+import { useRecoilValue } from 'recoil'
+import { Link } from 'src/components/Link/Link'
+import { FaDocker, FaGithub, FaXTwitter, FaDiscourse } from 'react-icons/fa6'
+import { LinkSmart } from 'src/components/Link/LinkSmart'
+import { hasRanAtom, hasTreeAtom } from 'src/state/results.state'
 import styled from 'styled-components'
-
 import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
-
-import { Link } from 'src/components/Link/Link'
-import { LinkExternal } from 'src/components/Link/LinkExternal'
-
 import BrandLogoBase from 'src/assets/img/nextclade_logo.svg'
-
 import { CitationButton } from 'src/components/Citation/CitationButton'
-import { WhatsNewButton } from './WhatsNewButton'
+import { NextcladeTextLogo } from 'src/components/Layout/NextcladeTextLogo'
 import { LanguageSwitcher } from './LanguageSwitcher'
-import { NavigationLogo } from './NavigationLogo'
-import { SettingsButton } from './SettingsButton'
 
-const BRAND_LOGO_SIZE = 36
-const BRAND_LOGO_MARGIN = 10
+const LOGO_SIZE = 36
 
-export const BrandLogo = styled(BrandLogoBase)`
-  width: ${BRAND_LOGO_SIZE}px;
-  height: ${BRAND_LOGO_SIZE}px;
-  margin-left: ${BRAND_LOGO_MARGIN}px;
-  margin-right: ${BRAND_LOGO_MARGIN}px;
+export const Navbar = styled(NavbarBase)`
+  height: 45px;
+  display: flex;
+  padding: 0 !important;
+  margin: 0 !important;
+  box-shadow: 0 0 8px 0 #0004;
 `
 
-export const NavLinkContainer = styled.div`
-  width: 50px;
-  @media (min-width: 1200px) {
-    width: 100px;
-  }
+export const Nav = styled(NavBase)`
+  display: flex;
+  vertical-align: middle;
+  padding: 0 !important;
+  margin: 0 !important;
 `
 
-export const NavLinkGrey = styled(LinkExternal)`
-  color: inherit;
-  text-decoration: none;
-  cursor: pointer;
-
-  &.active,
-  &:active,
-  &:hover,
-  &:focus,
-  &:focus-within {
-    color: initial;
-    text-decoration: none;
-    cursor: pointer;
+export const NavItem = styled(NavItemBase)`
+  padding: 0 0.5rem;
+  flex-grow: 0;
+  flex-shrink: 0;
+  margin: auto;
+
+  * {
+    vertical-align: middle;
   }
 `
 
-export function DocsLink() {
-  const { t } = useTranslationSafe()
+const NavbarBrand = styled(NavbarBrandBase)`
+  display: flex;
+  flex: 1;
+  padding: 0 !important;
+  margin: 0 !important;
+`
+
+const BrandLogo = styled(BrandLogoBase)`
+  width: ${LOGO_SIZE}px;
+  height: ${LOGO_SIZE}px;
+  padding: 0 !important;
+  margin-left: 0.5rem;
+`
+
+const BrandText = styled(NextcladeTextLogo)`
+  margin-left: 1rem;
+  margin-right: 1rem;
+`
+
+export const NavLinkLocalStyle = styled(LinkSmart)<{ $active: boolean; disabled?: boolean }>`
+  color: ${({ $active, disabled, theme }) => (disabled ? theme.gray500 : $active ? theme.primary : theme.bodyColor)};
+  font-weight: ${({ $active }) => $active && 'bold'};
+  text-decoration: ${({ $active }) => $active && 'underline'};
+  cursor: ${({ disabled }) => (disabled ? 'not-allowed' : 'pointer')};
+`
+
+export interface NavLinkDesc {
+  url?: string
+  content?: ReactNode
+  title?: string
+}
+
+export interface NavLinkLocalProps {
+  desc: NavLinkDesc
+  active?: boolean
+}
 
+export function NavLinkImpl({ desc: { url, content, title }, active = false }: NavLinkLocalProps) {
   return (
-    <NavLinkContainer className="text-center">
-      <NavLinkGrey
-        href="https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextclade-web.html"
-        alt={t('Documentation')}
-        title={t('Documentation')}
-      >
-        <IoMdBook className="mr-xl-2" />
-        <span className="d-none d-xl-inline">{t('Docs')}</span>
-      </NavLinkGrey>
-    </NavLinkContainer>
+    <NavItem key={url} title={title} aria-disabled={!url}>
+      <NavLinkLocalStyle href={url} $active={active} aria-disabled={!url} disabled={!url}>
+        {content}
+      </NavLinkLocalStyle>
+    </NavItem>
   )
 }
 
 export function NavigationBar() {
   const { t } = useTranslationSafe()
+  const { pathname } = useRouter()
+
+  const hasTree = useRecoilValue(hasTreeAtom)
+  const hasRan = useRecoilValue(hasRanAtom)
+
+  const linksLeft = useMemo(() => {
+    return [
+      { url: '/', content: t('Start'), title: t('Show start page') },
+      {
+        url: hasRan ? '/results' : undefined,
+        content: t('Results'),
+        title: hasRan ? t('Show analysis results table') : t('Please run the analysis first'),
+      },
+      {
+        url: hasTree ? '/tree' : undefined,
+        content: t('Tree'),
+        title: hasTree ? t('Show phylogenetic tree') : t('Please run the analysis on a dataset with reference tree'),
+      },
+      {
+        url: '/settings',
+        content: t('Settings'),
+        title: t('Configure Nextclade'),
+      },
+    ].map((desc) => {
+      return <NavLinkImpl key={desc.url ?? desc.title} desc={desc} active={pathname === desc.url} />
+    })
+  }, [hasRan, hasTree, pathname, t])
 
-  const navLinksRight = useMemo(
-    () => [
+  const linksRight = useMemo(() => {
+    return [
+      {
+        title: t('Cite Nextclade in your work'),
+        content: <CitationButton />,
+      },
+      {
+        url: 'https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextclade-web.html',
+        title: t('Nextclade Web documentation'),
+        content: t('Docs'),
+      },
+      {
+        url: 'https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextclade-cli.html',
+        title: t('Nextclade CLI documentation'),
+        content: t('CLI'),
+      },
       {
-        title: t('Twitter'),
         url: 'https://twitter.com/nextstrain',
-        alt: t('Link to our Twitter'),
-        icon: <FaTwitter size={28} color="#aaa" />,
+        title: t('Link to our X.com (Twitter)'),
+        content: <FaXTwitter size={20} color="#aaa" className="mb-1" />,
+      },
+      {
+        url: 'https://discussion.nextstrain.org/',
+        title: t('Link to our discussion forum'),
+        content: <FaDiscourse size={20} color="#aaa" className="mb-1" />,
       },
       {
-        title: t('Our containers at Docker Hub'),
         url: 'https://hub.docker.com/r/nextstrain/nextclade',
-        alt: t('Link to our Docker containers'),
-        icon: <FaDocker size={28} color="#aaa" />,
+        title: t('Link to our Docker containers'),
+        content: <FaDocker size={20} color="#aaa" className="mb-1" />,
       },
       {
-        title: t('GitHub'),
         url: 'https://github.com/nextstrain/nextclade',
-        alt: t('Link to our Github page'),
-        icon: <FaGithub size={28} color="#aaa" />,
+        title: t('Link to our Github page'),
+        content: <FaGithub size={20} color="#aaa" className="mb-1" />,
       },
-    ],
-    [t],
-  )
+      {
+        title: t('Change language'),
+        content: <LanguageSwitcher />,
+      },
+    ].map((desc) => {
+      return <NavLinkImpl key={desc.title} desc={desc} active={pathname === desc.url} />
+    })
+  }, [pathname, t])
 
   return (
-    <nav
-      className="navbar navbar-expand navbar-light navbar-scroll hide-native-scrollbar"
-      role="navigation"
-      data-testid="NavigationBar"
-    >
-      <Link className="navbar-brand d-flex" href="/" role="button">
-        <BrandLogo />
-        <NavigationLogo />
-      </Link>
-
-      <ul className="navbar-nav ml-auto d-flex">
-        <li className="nav-item mx-2 my-auto">
-          <CitationButton />
-        </li>
-
-        <li className="nav-item mx-2 my-auto">
-          <DocsLink />
-        </li>
-
-        <li className="nav-item mx-2 my-auto">
-          <SettingsButton />
-        </li>
-
-        <li className="nav-item mx-2 my-auto">
-          <WhatsNewButton />
-        </li>
-
-        <li className="nav-item mx-2 my-auto">
-          <LanguageSwitcher />
-        </li>
-
-        {navLinksRight.map(({ title, url, alt, icon }) => (
-          <li key={title} className="nav-item mx-2 my-auto">
-            <LinkExternal title={title} url={url} alt={alt}>
-              {icon}
-            </LinkExternal>
-          </li>
-        ))}
-      </ul>
-    </nav>
+    <Navbar light role="navigation">
+      <Nav>
+        <NavbarBrand tag={Link} href="/">
+          <BrandLogo />
+          <BrandText />
+        </NavbarBrand>
+
+        {linksLeft}
+      </Nav>
+      <Nav className="ml-auto">{linksRight}</Nav>
+    </Navbar>
   )
 }
diff --git a/packages_rs/nextclade-web/src/components/Layout/NavigationLogo.tsx b/packages_rs/nextclade-web/src/components/Layout/NextcladeTextLogo.tsx
similarity index 83%
rename from packages_rs/nextclade-web/src/components/Layout/NavigationLogo.tsx
rename to packages_rs/nextclade-web/src/components/Layout/NextcladeTextLogo.tsx
index ec93789d7..e0ad81bdc 100644
--- a/packages_rs/nextclade-web/src/components/Layout/NavigationLogo.tsx
+++ b/packages_rs/nextclade-web/src/components/Layout/NextcladeTextLogo.tsx
@@ -3,23 +3,24 @@ import styled from 'styled-components'
 
 import { TITLE_COLORS } from 'src/constants'
 
+const Container = styled.div``
+
 // Borrowed with modifications from Nextstrain.org
 // https://github.com/nextstrain/nextstrain.org/blob/master/static-site/src/components/splash/title.jsx
-
 const LetterSpan = styled.span<{ pos: number }>`
   font-size: 20px;
   color: ${(props) => TITLE_COLORS[props.pos]};
 `
 
-export function NavigationLogo() {
+export function NextcladeTextLogo({ ...rest }) {
   return (
-    <div>
+    <Container {...rest}>
       {'Nextclade'.split('').map((letter, i) => (
         // eslint-disable-next-line react/no-array-index-key
         <LetterSpan key={`${i}_${letter}`} pos={i}>
           {letter}
         </LetterSpan>
       ))}
-    </div>
+    </Container>
   )
 }
diff --git a/packages_rs/nextclade-web/src/components/Link/LinkSmart.tsx b/packages_rs/nextclade-web/src/components/Link/LinkSmart.tsx
new file mode 100644
index 000000000..6216ec3d5
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Link/LinkSmart.tsx
@@ -0,0 +1,25 @@
+import React, { useMemo } from 'react'
+
+import type { StrictOmit } from 'ts-essentials'
+import isAbsoluteUrl from 'is-absolute-url'
+
+import { Link, LinkProps } from './Link'
+import { LinkExternal, LinkExternalProps } from './LinkExternal'
+
+export interface LinkSmartProps extends StrictOmit<LinkProps & LinkExternalProps, 'href' | 'as'> {
+  href?: string
+}
+
+export function LinkSmart({ href, ...restProps }: LinkSmartProps) {
+  const external = useMemo(() => isAbsoluteUrl(href ?? ''), [href])
+
+  if (!href) {
+    return <span {...restProps} />
+  }
+
+  if (external) {
+    return <LinkExternal href={href} {...restProps} />
+  }
+
+  return <Link href={href} {...restProps} />
+}
diff --git a/packages_rs/nextclade-web/src/components/Loading/Loading.tsx b/packages_rs/nextclade-web/src/components/Loading/Loading.tsx
index 317a092ba..3daa9ee00 100644
--- a/packages_rs/nextclade-web/src/components/Loading/Loading.tsx
+++ b/packages_rs/nextclade-web/src/components/Loading/Loading.tsx
@@ -95,3 +95,5 @@ function Loading() {
 }
 
 export default Loading
+
+export const LOADING = <Loading />
diff --git a/packages_rs/nextclade-web/src/components/Main/DatasetContentSection.tsx b/packages_rs/nextclade-web/src/components/Main/DatasetContentSection.tsx
new file mode 100644
index 000000000..2c1d98107
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Main/DatasetContentSection.tsx
@@ -0,0 +1,142 @@
+import classnames from 'classnames'
+import React, { PropsWithChildren, useCallback, useMemo, useRef, useState } from 'react'
+import styled from 'styled-components'
+import {
+  Nav as NavBase,
+  NavItem as NavItemBase,
+  NavLink as NavLinkBase,
+  TabPane,
+  TabContent as TabContentBase,
+  NavItemProps,
+  TabPaneProps,
+} from 'reactstrap'
+import { useRecoilValue } from 'recoil'
+import { MarkdownRemote } from 'src/components/Common/Markdown'
+import { datasetCurrentAtom } from 'src/state/dataset.state'
+
+export function DatasetContentSection() {
+  const [activeTabId, setActiveTabId] = useState(0)
+  const currentDataset = useRecoilValue(datasetCurrentAtom)
+  return (
+    <ContentSection>
+      <Nav tabs>
+        <TabLabel tabId={0} activeTabId={activeTabId} setActiveTabId={setActiveTabId}>
+          {'README.md'}
+        </TabLabel>
+        <TabLabel tabId={1} activeTabId={activeTabId} setActiveTabId={setActiveTabId}>
+          {'CHANGELOG.md'}
+        </TabLabel>
+      </Nav>
+
+      <TabContent activeTab={activeTabId}>
+        <TabContentPane tabId={0} activeTabId={activeTabId}>
+          {currentDataset?.files.readme && <MarkdownRemote url={currentDataset?.files.readme} />}
+        </TabContentPane>
+        <TabContentPane tabId={1} activeTabId={activeTabId}>
+          {currentDataset?.files.changelog && <MarkdownRemote url={currentDataset?.files.changelog} />}
+        </TabContentPane>
+      </TabContent>
+    </ContentSection>
+  )
+}
+
+export interface TabProps extends PropsWithChildren<NavItemProps> {
+  tabId: number
+  activeTabId: number
+  setActiveTabId: (id: number) => void
+}
+
+export function TabLabel({ tabId, activeTabId, setActiveTabId, children, ...rest }: TabProps) {
+  const onClick = useCallback(() => setActiveTabId(tabId), [setActiveTabId, tabId])
+  const active = activeTabId === tabId
+  return (
+    <NavItem className={classnames({ active })} {...rest}>
+      <NavLink tag="nav" className={classnames({ active })} onClick={onClick}>
+        {children}
+      </NavLink>
+    </NavItem>
+  )
+}
+
+export interface TabContentPaneProps extends PropsWithChildren<TabPaneProps> {
+  tabId: number
+  activeTabId: number
+}
+
+export function TabContentPane({ tabId, activeTabId, children, ...rest }: TabContentPaneProps) {
+  const active = activeTabId === tabId
+  return (
+    <LazyRender visible={active}>
+      <TabPane tabId={0} {...rest}>
+        {children}
+      </TabPane>
+    </LazyRender>
+  )
+}
+
+export interface LazyProps {
+  visible: boolean
+}
+
+export function LazyRender({ visible, children }: PropsWithChildren<LazyProps>) {
+  const rendered = useRef(visible)
+  const style = useMemo(() => ({ display: visible ? 'block' : 'none' }), [visible])
+  if (visible && !rendered.current) {
+    rendered.current = true
+  }
+  if (!rendered.current) return null
+  return <div style={style}>{children}</div>
+}
+
+const ContentSection = styled.div`
+  max-width: 100%;
+`
+
+const Nav = styled(NavBase)`
+  border-bottom: 0 !important;
+`
+
+const NavItem = styled(NavItemBase)`
+  cursor: pointer;
+
+  border: #ccc 1px solid;
+  border-top-left-radius: 5px;
+  border-top-right-radius: 5px;
+
+  background: #ddd;
+  min-width: 170px;
+
+  border-bottom: 0 !important;
+
+  .active {
+    font-weight: bold;
+    background-color: ${(props) => props.theme.white} !important;
+  }
+
+  .nav-link,
+  .nav-link:focus {
+    box-shadow: none !important;
+
+    &:hover {
+      box-shadow: none !important;
+    }
+  }
+
+  .nav-link.active,
+  .nav-link.active:focus {
+    box-shadow: none !important;
+  }
+
+  .nav-link.disabled {
+    box-shadow: none !important;
+  }
+`
+
+const NavLink = styled(NavLinkBase)`
+  color: ${(props) => props.theme.bodyColor};
+`
+
+const TabContent = styled(TabContentBase)`
+  border: #ddd 1px solid;
+  margin-top: -1px;
+`
diff --git a/packages_rs/nextclade-web/src/components/Main/DatasetCurrent.tsx b/packages_rs/nextclade-web/src/components/Main/DatasetCurrent.tsx
index af0807201..0c4d5167e 100644
--- a/packages_rs/nextclade-web/src/components/Main/DatasetCurrent.tsx
+++ b/packages_rs/nextclade-web/src/components/Main/DatasetCurrent.tsx
@@ -1,5 +1,5 @@
 import { isNil } from 'lodash'
-import React, { useCallback, useState } from 'react'
+import React, { useCallback, useMemo, useState } from 'react'
 import { Button, Col, Collapse, Row, UncontrolledAlert } from 'reactstrap'
 import { useRecoilState, useRecoilValue, useResetRecoilState, useSetRecoilState } from 'recoil'
 import styled from 'styled-components'
@@ -82,6 +82,28 @@ export function DatasetCurrent() {
 
   const onCustomizeClicked = useCallback(() => setAdvancedOpen((advancedOpen) => !advancedOpen), [])
 
+  const customize = useMemo(() => {
+    if (datasetCurrent?.path === 'autodetect') {
+      return null
+    }
+
+    return (
+      <Row noGutters>
+        <Col>
+          <ButtonCustomize isOpen={advancedOpen} onClick={onCustomizeClicked} />
+
+          <Collapse isOpen={advancedOpen}>
+            <AdvancedModeExplanationWrapper>
+              <AdvancedModeExplanationContent />
+            </AdvancedModeExplanationWrapper>
+
+            <FilePickerAdvanced />
+          </Collapse>
+        </Col>
+      </Row>
+    )
+  }, [advancedOpen, datasetCurrent?.path, onCustomizeClicked])
+
   if (!datasetCurrent) {
     return null
   }
@@ -105,29 +127,11 @@ export function DatasetCurrent() {
               <ChangeButton type="button" color="secondary" onClick={onChangeClicked}>
                 {t('Change')}
               </ChangeButton>
-              <LinkExternal
-                className="ml-auto mt-auto"
-                href="https://github.com/nextstrain/nextclade_data/blob/release/CHANGELOG.md"
-              >
-                <small>{t('Recent dataset updates')}</small>
-              </LinkExternal>
             </Right>
           </Col>
         </Row>
 
-        <Row noGutters>
-          <Col>
-            <ButtonCustomize isOpen={advancedOpen} onClick={onCustomizeClicked} />
-
-            <Collapse isOpen={advancedOpen}>
-              <AdvancedModeExplanationWrapper>
-                <AdvancedModeExplanationContent />
-              </AdvancedModeExplanationWrapper>
-
-              <FilePickerAdvanced />
-            </Collapse>
-          </Col>
-        </Row>
+        {customize}
       </CurrentDatasetInfoBody>
     </CurrentDatasetInfoContainer>
   )
diff --git a/packages_rs/nextclade-web/src/components/Main/DatasetInfo.tsx b/packages_rs/nextclade-web/src/components/Main/DatasetInfo.tsx
index 65bc404d3..db8e79afc 100644
--- a/packages_rs/nextclade-web/src/components/Main/DatasetInfo.tsx
+++ b/packages_rs/nextclade-web/src/components/Main/DatasetInfo.tsx
@@ -1,24 +1,74 @@
+import { isNil } from 'lodash'
+import { darken } from 'polished'
 import React, { useMemo } from 'react'
-
-import styled from 'styled-components'
-
-import type { Dataset } from 'src/types'
+import { Badge } from 'reactstrap'
+import { useRecoilValue } from 'recoil'
+import { colorHash } from 'src/helpers/colorHash'
 import { formatDateIsoUtcSimple } from 'src/helpers/formatDate'
+import { firstLetter } from 'src/helpers/string'
 import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+import {
+  autodetectResultsByDatasetAtom,
+  DATASET_ID_UNDETECTED,
+  numberAutodetectResultsAtom,
+} from 'src/state/autodetect.state'
+import type { Dataset } from 'src/types'
+import styled from 'styled-components'
+
+export const Container = styled.div`
+  display: flex;
+  //border: 1px #ccc9 solid;
+  //border-radius: 5px;
 
-export const DatasetinfoContainer = styled.div``
+  //margin-top: 3px !important;
+  //margin-bottom: 3px !important;
+  //margin-left: 5px;
+  //padding: 15px;
+
+  margin: 0;
+  padding: 15px;
+  box-shadow: 0 0 12px 0 #0002;
+  border: 1px #ccc9 solid;
+  border-radius: 5px;
+`
+
+export const FlexLeft = styled.div`
+  flex: 0;
+  display: flex;
+  flex-direction: column;
+  margin: auto 0;
+`
+
+export const FlexRight = styled.div`
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  margin-left: 1rem;
+`
 
-export const DatasetName = styled.h6`
-  font-size: 1.3rem;
+export const DatasetName = styled.h4`
+  display: flex;
   font-weight: bold;
-  padding: 0;
   margin: 0;
+  padding: 0;
+  height: 100%;
 `
 
 export const DatasetInfoLine = styled.p`
-  font-size: 0.8rem;
+  font-size: 0.9rem;
   padding: 0;
   margin: 0;
+
+  &:after {
+    content: ' ';
+    white-space: pre;
+  }
+`
+
+const DatasetInfoBadge = styled(Badge)`
+  font-size: 0.8rem;
+  margin-top: 2px !important;
+  padding: 0.25rem 0.5rem;
 `
 
 export interface DatasetInfoProps {
@@ -27,21 +77,183 @@ export interface DatasetInfoProps {
 
 export function DatasetInfo({ dataset }: DatasetInfoProps) {
   const { t } = useTranslationSafe()
-  const tagFormatted = useMemo(() => dataset && formatDateIsoUtcSimple(dataset.attributes.tag.value), [dataset])
+  const { attributes, official, deprecated, enabled, experimental, path, version } = dataset
+  const { name, reference } = attributes
 
-  const { name, reference } = dataset.attributes
+  const updatedAt = useMemo(() => {
+    let updatedAt = version?.updatedAt ? formatDateIsoUtcSimple(version?.updatedAt) : 'unknown'
+    if (version?.tag === 'unreleased') {
+      updatedAt = `${updatedAt} (unreleased)`
+    }
+    return updatedAt
+  }, [version?.tag, version?.updatedAt])
+
+  if (!enabled) {
+    return null
+  }
+
+  if (path === DATASET_ID_UNDETECTED) {
+    return <DatasetUndetectedInfo />
+  }
 
   return (
-    <DatasetinfoContainer>
-      <DatasetName>{name.valueFriendly ?? name.value}</DatasetName>
-      <DatasetInfoLine>
-        {t('Reference: {{ name }} ({{ accession }})', {
-          name: reference.valueFriendly ?? 'Untitled',
-          accession: reference.value,
-        })}
-      </DatasetInfoLine>
-      <DatasetInfoLine>{t('Updated: {{updated}}', { updated: tagFormatted })}</DatasetInfoLine>
-      <DatasetInfoLine>{t('Dataset name: {{name}}', { name: name.value })}</DatasetInfoLine>
-    </DatasetinfoContainer>
+    <Container>
+      <FlexLeft>
+        <DatasetInfoAutodetectProgressCircle dataset={dataset} />
+      </FlexLeft>
+
+      <FlexRight>
+        <DatasetName>
+          <span>{name.valueFriendly ?? name.value ?? path}</span>
+
+          <span className="d-flex ml-auto">
+            {official ? (
+              <DatasetInfoBadge
+                className="ml-2 my-auto"
+                color="success"
+                title="This dataset is provided by Nextclade team."
+              >
+                {t('official')}
+              </DatasetInfoBadge>
+            ) : (
+              <DatasetInfoBadge
+                className="ml-2 my-auto"
+                color="info"
+                title="This dataset is provided by the community members. Nextclade team cannot verify correctness of community datasets or provide support for them. Use at own risk. Please contact dataset authors for all questions."
+              >
+                {t('community')}
+              </DatasetInfoBadge>
+            )}
+
+            {experimental && (
+              <DatasetInfoBadge
+                className="ml-2 my-auto"
+                color="warning"
+                title="Dataset authors marked this dataset as experimental, which means the dataset is stil under development, is of lower quality than usual or has other issues. Use at own risk. Please contact dataset authors for specifics."
+              >
+                {t('experimental')}
+              </DatasetInfoBadge>
+            )}
+
+            {deprecated && (
+              <DatasetInfoBadge
+                className="ml-2 my-auto"
+                color="secondary"
+                title="Dataset authors marked this dataset as deprecated, which means the dataset is obsolete, will no longer be updated or is not relevant otherwise. Please contact dataset authors for specifics."
+              >
+                {t('deprecated')}
+              </DatasetInfoBadge>
+            )}
+          </span>
+        </DatasetName>
+
+        <DatasetInfoLine>
+          {t('Reference: {{ name }} ({{ accession }})', {
+            name: reference.valueFriendly ?? 'Untitled',
+            accession: reference.value,
+          })}
+        </DatasetInfoLine>
+        <DatasetInfoLine>{t('Updated at: {{updated}}', { updated: updatedAt })}</DatasetInfoLine>
+        <DatasetInfoLine>{t('Dataset name: {{name}}', { name: path })}</DatasetInfoLine>
+      </FlexRight>
+    </Container>
   )
 }
+
+export function DatasetUndetectedInfo() {
+  const { t } = useTranslationSafe()
+
+  return (
+    <Container>
+      <DatasetName>
+        <span>{t('Autodetect')}</span>
+      </DatasetName>
+      <DatasetInfoLine>{t('Detect pathogen automatically from sequences')}</DatasetInfoLine>
+      <DatasetInfoLine />
+      <DatasetInfoLine />
+    </Container>
+  )
+}
+
+export interface DatasetInfoCircleProps {
+  dataset: Dataset
+}
+
+function DatasetInfoAutodetectProgressCircle({ dataset }: DatasetInfoCircleProps) {
+  const { attributes, path } = dataset
+  const { name } = attributes
+
+  const circleBg = useMemo(() => darken(0.1)(colorHash(path, { saturation: 0.5, reverse: true })), [path])
+  const records = useRecoilValue(autodetectResultsByDatasetAtom(path))
+  const numberAutodetectResults = useRecoilValue(numberAutodetectResultsAtom)
+
+  const { circleText, countText, percentage } = useMemo(() => {
+    if (isNil(records)) {
+      return {
+        circleText: (firstLetter(name.valueFriendly ?? name.value) ?? ' ').toUpperCase(),
+        percentage: 0,
+        countText: '\u00A0',
+      }
+    }
+
+    if (records.length > 0) {
+      const percentage = records.length / numberAutodetectResults
+      const circleText = `${(100 * percentage).toFixed(0)}%`
+      const countText = `${records.length} / ${numberAutodetectResults}`
+      return { circleText, percentage, countText }
+    }
+    return { circleText: `0%`, percentage: 0, countText: `0 / ${numberAutodetectResults}` }
+  }, [records, name.value, name.valueFriendly, numberAutodetectResults])
+
+  return (
+    <>
+      <CircleBorder $percentage={percentage}>
+        <Circle $bg={circleBg}>{circleText}</Circle>
+      </CircleBorder>
+
+      <CountText>{countText}</CountText>
+    </>
+  )
+}
+
+const CountText = styled.span`
+  text-align: center;
+  font-size: 0.8rem;
+`
+
+interface CircleBorderProps {
+  $percentage: number
+  $fg?: string
+  $bg?: string
+}
+
+const CircleBorder = styled.div.attrs<CircleBorderProps>((props) => ({
+  style: {
+    background: `
+      radial-gradient(closest-side, white 79%, transparent 80% 100%),
+      conic-gradient(
+        ${props.$fg ?? props.theme.success} calc(${props.$percentage} * 100%),
+        ${props.$bg ?? 'lightgray'} 0
+      )`,
+  },
+}))<CircleBorderProps>`
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  border-radius: 50%;
+  width: 75px;
+  height: 75px;
+`
+
+const Circle = styled.div<{ $bg?: string; $fg?: string }>`
+  display: flex;
+  margin: auto;
+  justify-content: center;
+  align-items: center;
+  border-radius: 50%;
+  background: ${(props) => props.$bg ?? props.theme.gray700};
+  color: ${(props) => props.$fg ?? props.theme.gray100};
+  width: 60px;
+  height: 60px;
+  font-size: 1.2rem;
+`
diff --git a/packages_rs/nextclade-web/src/components/Main/DatasetSelector.tsx b/packages_rs/nextclade-web/src/components/Main/DatasetSelector.tsx
index 9bae9b33c..ca7f290b4 100644
--- a/packages_rs/nextclade-web/src/components/Main/DatasetSelector.tsx
+++ b/packages_rs/nextclade-web/src/components/Main/DatasetSelector.tsx
@@ -1,32 +1,87 @@
-import React, { HTMLProps, useCallback, useState } from 'react'
-import classNames from 'classnames'
-import { ThreeDots } from 'react-loader-spinner'
-import { Button, Col, Container, Input, Row } from 'reactstrap'
+import React, { HTMLProps, useState } from 'react'
 import { useRecoilState, useRecoilValue } from 'recoil'
 import styled from 'styled-components'
-import type { Dataset } from 'src/types'
-import { datasetCurrentAtom, datasetsAtom } from 'src/state/dataset.state'
+import { ThreeDots } from 'react-loader-spinner'
+import { SuggestionPanel } from 'src/components/Main/SuggestionPanel'
 import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
-import { LinkExternal } from 'src/components/Link/LinkExternal'
-import { DatasetSelectorList } from './DatasetSelectorList'
+import { datasetCurrentAtom, datasetsAtom } from 'src/state/dataset.state'
+import { SearchBox } from 'src/components/Common/SearchBox'
+import { DatasetSelectorList } from 'src/components/Main/DatasetSelectorList'
 
-const DatasetSelectorContainer = styled(Container)`
+export function DatasetSelector() {
+  const { t } = useTranslationSafe()
+  const [searchTerm, setSearchTerm] = useState('')
+  const { datasets } = useRecoilValue(datasetsAtom)
+  const [datasetCurrent, setDatasetCurrent] = useRecoilState(datasetCurrentAtom)
+
+  const isBusy = datasets.length === 0
+
+  return (
+    <Container>
+      <Header>
+        <Title>{t('Select dataset')}</Title>
+
+        <SearchBox searchTitle={t('Search datasets')} searchTerm={searchTerm} onSearchTermChange={setSearchTerm} />
+      </Header>
+
+      <Main>
+        {!isBusy && (
+          <DatasetSelectorList
+            datasets={datasets}
+            datasetHighlighted={datasetCurrent}
+            searchTerm={searchTerm}
+            onDatasetHighlighted={setDatasetCurrent}
+          />
+        )}
+
+        {isBusy && (
+          <SpinnerWrapper>
+            <SpinnerWrapperInternal>
+              <Spinner color="#aaa" width={20} height={20} />
+            </SpinnerWrapperInternal>
+          </SpinnerWrapper>
+        )}
+      </Main>
+
+      <Footer>
+        <SuggestionPanel />
+      </Footer>
+    </Container>
+  )
+}
+
+const Container = styled.div`
   display: flex;
+  flex: 1;
   flex-direction: column;
-  width: 100%;
   height: 100%;
-  padding: 0;
+  overflow: hidden;
+  margin-right: 10px;
+`
+
+const Header = styled.div`
+  display: flex;
+  flex: 0;
+  padding-left: 10px;
+  margin-top: 10px;
+  margin-bottom: 3px;
 `
 
-const DatasetSelectorTitle = styled.h4`
+const Main = styled.div`
+  display: flex;
   flex: 1;
-  margin: auto 0;
+  flex-direction: column;
+  overflow: hidden;
 `
 
-const DatasetSelectorListContainer = styled.section`
+const Footer = styled.div`
   display: flex;
-  width: 100%;
-  height: 300px;
+  flex: 0;
+`
+
+const Title = styled.h4`
+  flex: 1;
+  margin: auto 0;
 `
 
 const SpinnerWrapper = styled.div<HTMLProps<HTMLDivElement>>`
@@ -44,104 +99,3 @@ const Spinner = styled(ThreeDots)`
   margin: auto;
   height: 100%;
 `
-
-export interface DatasetSelectorProps {
-  searchTerm: string
-
-  setSearchTerm(searchTerm: string): void
-}
-
-export function DatasetSelector({ searchTerm, setSearchTerm }: DatasetSelectorProps) {
-  const { t } = useTranslationSafe()
-  const [error, setError] = useState<string | undefined>()
-  const { datasets } = useRecoilValue(datasetsAtom)
-  const [datasetCurrent, setDatasetCurrent] = useRecoilState(datasetCurrentAtom)
-  const [datasetHighlighted, setDatasetHighlighted] = useState<Dataset | undefined>(datasetCurrent)
-
-  const onSearchTermChange = useCallback(
-    (event: React.ChangeEvent<HTMLInputElement>) => {
-      const { value } = event.target
-      setSearchTerm(value)
-    },
-    [setSearchTerm],
-  )
-
-  const onNextClicked = useCallback(() => {
-    if (datasetHighlighted) {
-      setDatasetCurrent(datasetHighlighted)
-      setError(undefined)
-    } else {
-      setError(t('Please select a pathogen first'))
-    }
-  }, [datasetHighlighted, setDatasetCurrent, t])
-
-  const isBusy = datasets.length === 0
-
-  return (
-    <DatasetSelectorContainer fluid>
-      <Row noGutters>
-        <Col sm={6} className="d-flex">
-          <DatasetSelectorTitle>{t('Select a pathogen')}</DatasetSelectorTitle>
-        </Col>
-
-        <Col sm={6}>
-          <Input
-            type="text"
-            title="Search pathogens"
-            placeholder="Search pathogens"
-            autoComplete="off"
-            autoCorrect="off"
-            autoCapitalize="off"
-            spellCheck="false"
-            data-gramm="false"
-            value={searchTerm}
-            onChange={onSearchTermChange}
-          />
-        </Col>
-      </Row>
-
-      <Row noGutters className="mt-2">
-        <DatasetSelectorListContainer>
-          {!isBusy && (
-            <DatasetSelectorList
-              datasets={datasets}
-              datasetHighlighted={datasetHighlighted}
-              searchTerm={searchTerm}
-              onDatasetHighlighted={setDatasetHighlighted}
-            />
-          )}
-
-          {isBusy && (
-            <SpinnerWrapper>
-              <SpinnerWrapperInternal>
-                <Spinner color="#aaa" width={20} height={20} />
-              </SpinnerWrapperInternal>
-            </SpinnerWrapper>
-          )}
-        </DatasetSelectorListContainer>
-      </Row>
-
-      <Row noGutters>
-        <Col className="py-1">
-          <LinkExternal href="https://github.com/nextstrain/nextclade_data/blob/release/CHANGELOG.md">
-            <small>{t('Recent dataset updates')}</small>
-          </LinkExternal>
-        </Col>
-      </Row>
-
-      <Row noGutters className="mt-2">
-        <Col className="d-flex">
-          {error && <p className="m-0 p-0 flex-1 text-danger">{error}</p>}
-          <Button
-            className={classNames('ml-auto', !datasetHighlighted && 'disabled')}
-            type="button"
-            color={datasetHighlighted ? 'primary' : 'secondary'}
-            onClick={onNextClicked}
-          >
-            {t('Next')}
-          </Button>
-        </Col>
-      </Row>
-    </DatasetSelectorContainer>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/DatasetSelectorList.tsx b/packages_rs/nextclade-web/src/components/Main/DatasetSelectorList.tsx
index aca858f44..2bb5be734 100644
--- a/packages_rs/nextclade-web/src/components/Main/DatasetSelectorList.tsx
+++ b/packages_rs/nextclade-web/src/components/Main/DatasetSelectorList.tsx
@@ -1,62 +1,20 @@
-import React, { useCallback, useMemo } from 'react'
-
-import { ListGroup, ListGroupItem } from 'reactstrap'
-import styled from 'styled-components'
-
+import { get, isNil, sortBy } from 'lodash'
+import { lighten } from 'polished'
+import React, { forwardRef, useCallback, useEffect, useMemo, useRef } from 'react'
+import { ListGroup } from 'reactstrap'
+import { useRecoilState, useRecoilValue } from 'recoil'
+import { ListGenericCss } from 'src/components/Common/List'
+import { DatasetInfo } from 'src/components/Main/DatasetInfo'
+import { search } from 'src/helpers/search'
+import {
+  autodetectResultsAtom,
+  AutodetectRunState,
+  autodetectRunStateAtom,
+  groupByDatasets,
+} from 'src/state/autodetect.state'
 import type { Dataset } from 'src/types'
 import { areDatasetsEqual } from 'src/types'
-import { search } from 'src/helpers/search'
-import { DatasetInfo } from 'src/components/Main/DatasetInfo'
-
-export const DatasetSelectorContainer = styled.div`
-  flex: 1 0 100%;
-  display: flex;
-  flex-direction: column;
-  overflow: hidden;
-  height: 100%;
-  border: 1px #ccc solid;
-  border-radius: 5px;
-`
-
-export const DatasetSelectorUl = styled(ListGroup)`
-  flex: 1;
-  overflow-y: scroll;
-
-  // prettier-ignore
-  background:
-    linear-gradient(#eaeaea 25%, rgba(255,255,255, 0)),
-    linear-gradient(rgba(255,255,255, 0), #eaeaea 90%) 0 100%,
-    radial-gradient(farthest-side at 50% 0, rgba(100,100,100, 0.25), rgba(0,0,0,0)),
-    radial-gradient(farthest-side at 50% 100%, rgba(100,100,100, 0.25), rgba(0,0,0,0)) 0 100%;
-  background-color: transparent;
-  background-repeat: no-repeat;
-  background-attachment: local, local, scroll, scroll;
-  background-size: 100% 70px, 100% 70px, 100% 30px, 100% 30px;
-`
-
-export const DatasetSelectorLi = styled(ListGroupItem)<{ $isDimmed?: boolean }>`
-  list-style: none;
-  margin: 0;
-  padding: 0.5rem;
-  cursor: pointer;
-  opacity: ${(props) => props.$isDimmed && 0.33};
-  background-color: transparent;
-`
-
-export interface DatasetSelectorListItemProps {
-  dataset: Dataset
-  isCurrent?: boolean
-  isDimmed?: boolean
-  onClick?: () => void
-}
-
-export function DatasetSelectorListItem({ dataset, isCurrent, isDimmed, onClick }: DatasetSelectorListItemProps) {
-  return (
-    <DatasetSelectorLi $isDimmed={isDimmed} aria-current={isCurrent} active={isCurrent} onClick={onClick}>
-      <DatasetInfo dataset={dataset} />
-    </DatasetSelectorLi>
-  )
-}
+import styled from 'styled-components'
 
 export interface DatasetSelectorListProps {
   datasets: Dataset[]
@@ -66,12 +24,6 @@ export interface DatasetSelectorListProps {
   onDatasetHighlighted(dataset?: Dataset): void
 }
 
-function generateDatasetId(dataset: Dataset) {
-  return Object.entries(dataset.attributes)
-    .map(([key, attr]) => `${key}=${attr.value}`)
-    .join(';')
-}
-
 export function DatasetSelectorList({
   datasets,
   searchTerm,
@@ -80,25 +32,76 @@ export function DatasetSelectorList({
 }: DatasetSelectorListProps) {
   const onItemClick = useCallback((dataset: Dataset) => () => onDatasetHighlighted(dataset), [onDatasetHighlighted])
 
-  const { itemsStartWith, itemsInclude, itemsNotInclude } = useMemo(() => {
+  const autodetectResults = useRecoilValue(autodetectResultsAtom)
+  const [autodetectRunState, setAutodetectRunState] = useRecoilState(autodetectRunStateAtom)
+
+  const autodetectResult = useMemo(() => {
+    if (isNil(autodetectResults) || autodetectResults.length === 0) {
+      return { itemsStartWith: [], itemsInclude: datasets, itemsNotInclude: [] }
+    }
+
+    const recordsByDataset = groupByDatasets(autodetectResults)
+
+    let itemsInclude = datasets.filter((candidate) =>
+      Object.entries(recordsByDataset).some(([dataset, _]) => dataset === candidate.path),
+    )
+
+    itemsInclude = sortBy(itemsInclude, (dataset) => -get(recordsByDataset, dataset.path, []).length)
+
+    const itemsNotInclude = datasets.filter((candidate) => !itemsInclude.map((it) => it.path).includes(candidate.path))
+
+    return { itemsStartWith: [], itemsInclude, itemsNotInclude }
+  }, [autodetectResults, datasets])
+
+  const searchResult = useMemo(() => {
     if (searchTerm.trim().length === 0) {
-      return { itemsStartWith: datasets, itemsInclude: [], itemsNotInclude: [] }
+      return autodetectResult
     }
 
-    return search(datasets, searchTerm, (dataset) => [
-      dataset.attributes.name.value,
-      dataset.attributes.name.valueFriendly ?? '',
-      dataset.attributes.reference.value,
-    ])
-  }, [datasets, searchTerm])
+    return search(
+      [...autodetectResult.itemsStartWith, ...autodetectResult.itemsInclude, ...autodetectResult.itemsNotInclude],
+      searchTerm,
+      (dataset) => [
+        dataset.attributes.name.value,
+        dataset.attributes.name.valueFriendly ?? '',
+        dataset.attributes.reference.value,
+      ],
+    )
+  }, [autodetectResult, searchTerm])
+
+  const { itemsStartWith, itemsInclude, itemsNotInclude } = searchResult
+
+  const itemsRef = useRef<Map<string, HTMLLIElement>>(new Map())
+
+  function scrollToId(itemId: string) {
+    const node = itemsRef.current.get(itemId)
+    node?.scrollIntoView({
+      behavior: 'smooth',
+      block: 'nearest',
+      inline: 'center',
+    })
+  }
+
+  if (datasetHighlighted) {
+    scrollToId(datasetHighlighted.path)
+  }
+
+  useEffect(() => {
+    const topSuggestion = autodetectResult.itemsInclude[0]
+    if (autodetectRunState === AutodetectRunState.Done) {
+      onDatasetHighlighted(topSuggestion)
+      setAutodetectRunState(AutodetectRunState.Idle)
+    }
+  }, [autodetectRunState, autodetectResult.itemsInclude, onDatasetHighlighted, setAutodetectRunState])
 
-  return (
-    <DatasetSelectorContainer>
-      <DatasetSelectorUl>
+  const listItems = useMemo(() => {
+    return (
+      <>
         {[itemsStartWith, itemsInclude].map((datasets) =>
           datasets.map((dataset) => (
             <DatasetSelectorListItem
-              key={generateDatasetId(dataset)}
+              key={dataset.path}
+              ref={nodeRefSetOrDelete(itemsRef.current, dataset.path)}
               dataset={dataset}
               onClick={onItemClick(dataset)}
               isCurrent={areDatasetsEqual(dataset, datasetHighlighted)}
@@ -109,7 +112,8 @@ export function DatasetSelectorList({
         {[itemsNotInclude].map((datasets) =>
           datasets.map((dataset) => (
             <DatasetSelectorListItem
-              key={generateDatasetId(dataset)}
+              key={dataset.path}
+              ref={nodeRefSetOrDelete(itemsRef.current, dataset.path)}
               dataset={dataset}
               onClick={onItemClick(dataset)}
               isCurrent={areDatasetsEqual(dataset, datasetHighlighted)}
@@ -117,7 +121,62 @@ export function DatasetSelectorList({
             />
           )),
         )}
-      </DatasetSelectorUl>
-    </DatasetSelectorContainer>
-  )
+      </>
+    )
+  }, [datasetHighlighted, itemsInclude, itemsNotInclude, itemsStartWith, onItemClick])
+
+  return <Ul>{listItems}</Ul>
+}
+
+function nodeRefSetOrDelete<T extends HTMLElement>(map: Map<string, T>, key: string) {
+  return function nodeRefSetOrDeleteImpl(node: T) {
+    if (node) {
+      map.set(key, node)
+    } else {
+      map.delete(key)
+    }
+  }
+}
+
+export const Ul = styled(ListGroup)`
+  ${ListGenericCss};
+  flex: 1;
+  overflow: auto;
+  padding: 5px 5px;
+  border-radius: 0 !important;
+`
+
+export const Li = styled.li<{ $active?: boolean; $isDimmed?: boolean }>`
+  cursor: pointer;
+  opacity: ${(props) => props.$isDimmed && 0.4};
+  background-color: transparent;
+
+  margin: 3px 3px !important;
+  padding: 0 !important;
+  border-radius: 5px !important;
+
+  ${(props) =>
+    props.$active &&
+    `
+    background-color: ${lighten(0.033)(props.theme.primary)};
+    box-shadow: -3px 3px 12px 3px #0005;
+    opacity: ${props.$isDimmed && 0.66};
+   `};
+`
+
+interface DatasetSelectorListItemProps {
+  dataset: Dataset
+  isCurrent?: boolean
+  isDimmed?: boolean
+  onClick?: () => void
 }
+
+const DatasetSelectorListItem = forwardRef<HTMLLIElement, DatasetSelectorListItemProps>(
+  function DatasetSelectorListItemWithRef({ dataset, isCurrent, isDimmed, onClick }, ref) {
+    return (
+      <Li ref={ref} $isDimmed={isDimmed} aria-current={isCurrent} $active={isCurrent} onClick={onClick}>
+        <DatasetInfo dataset={dataset} />
+      </Li>
+    )
+  },
+)
diff --git a/packages_rs/nextclade-web/src/components/Main/Downloads.tsx b/packages_rs/nextclade-web/src/components/Main/Downloads.tsx
deleted file mode 100644
index d0c7a1597..000000000
--- a/packages_rs/nextclade-web/src/components/Main/Downloads.tsx
+++ /dev/null
@@ -1,168 +0,0 @@
-import React, { HTMLProps, ReactNode } from 'react'
-
-import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslationSafe'
-import { FaBook, FaDocker, FaDownload, FaGithub, FaGlobeAmericas } from 'react-icons/fa'
-import {
-  Card as ReactstrapCard,
-  CardBody as ReactstrapCardBody,
-  CardHeader as ReactstrapCardHeader,
-  Col,
-  Row,
-} from 'reactstrap'
-import styled from 'styled-components'
-
-import { LinkExternal as LinkExternalBase } from 'src/components/Link/LinkExternal'
-
-const DownloadLinkList = styled.ul`
-  display: flex;
-  flex-direction: column;
-  list-style: none;
-  padding: 0;
-`
-
-const DownloadLinkListItem = styled.li`
-  display: flex;
-  flex: 1;
-  margin: auto;
-`
-
-const LinkExternal = styled(LinkExternalBase)`
-  width: 200px;
-  height: 55px;
-  margin: 0.25rem;
-  padding: 1rem;
-`
-
-const Card = styled(ReactstrapCard)`
-  margin: 5px;
-  height: 100%;
-`
-
-const CardBody = styled(ReactstrapCardBody)`
-  padding: 0.5rem;
-`
-
-const CardHeader = styled(ReactstrapCardHeader)`
-  padding: 1rem;
-`
-
-const iconDownload = <FaDownload color="#653F12" size={20} />
-const iconGithub = <FaGithub color="444" size={20} />
-const iconDocker = <FaDocker color="#369cec" size={20} />
-const iconBook = <FaBook color="#777777" size={20} />
-const iconGlobe = <FaGlobeAmericas color="#5862dc" size={20} />
-
-export interface DownloadLinkProps extends HTMLProps<HTMLAnchorElement> {
-  Icon: ReactNode
-  text: string
-  url: string
-}
-
-export function DownloadLink({ Icon, text, url }: DownloadLinkProps) {
-  return (
-    <DownloadLinkListItem>
-      <LinkExternal href={url} className="btn btn-secondary d-flex" role="button">
-        <span className="my-auto">{Icon}</span>
-        <span className="my-auto ml-2">{text}</span>
-      </LinkExternal>
-    </DownloadLinkListItem>
-  )
-}
-
-export function Downloads() {
-  const { t } = useTranslation()
-
-  return (
-    <Row noGutters className="mt-5">
-      <Col>
-        <Row noGutters>
-          <Col>
-            <h3 className="text-center mx-2">{t('For more advanced use-cases:')}</h3>
-          </Col>
-        </Row>
-
-        <Row noGutters>
-          <Col lg={4}>
-            <Card>
-              <CardHeader>
-                <h4 className="text-center">{'Nextclade CLI'}</h4>
-                <p className="text-center">{t('faster, more configurable command-line version of this application')}</p>
-              </CardHeader>
-
-              <CardBody>
-                <DownloadLinkList>
-                  <DownloadLink
-                    Icon={iconDownload}
-                    text={t('Downloads')}
-                    url="https://github.com/nextstrain/nextclade/releases"
-                  />
-                  <DownloadLink
-                    Icon={iconDocker}
-                    text={t('Docker')}
-                    url="https://hub.docker.com/r/nextstrain/nextclade"
-                  />
-                  <DownloadLink
-                    Icon={iconBook}
-                    text={t('Documentation')}
-                    url="https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextclade-cli.html"
-                  />
-                </DownloadLinkList>
-              </CardBody>
-            </Card>
-          </Col>
-
-          <Col lg={4}>
-            <Card>
-              <CardHeader>
-                <h4 className="text-center">{'Nextalign CLI'}</h4>
-                <p className="text-center">
-                  {t('pairwise reference alignment and translation tool used by Nextclade')}
-                </p>
-              </CardHeader>
-
-              <CardBody>
-                <DownloadLinkList>
-                  <DownloadLink
-                    Icon={iconDownload}
-                    text={t('Downloads')}
-                    url="https://github.com/nextstrain/nextclade/releases"
-                  />
-                  <DownloadLink
-                    Icon={iconDocker}
-                    text={t('Docker')}
-                    url="https://hub.docker.com/r/nextstrain/nextalign"
-                  />
-                  <DownloadLink
-                    Icon={iconBook}
-                    text={t('Documentation')}
-                    url="https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextalign-cli.html"
-                  />
-                </DownloadLinkList>
-              </CardBody>
-            </Card>
-          </Col>
-
-          <Col lg={4}>
-            <Card>
-              <CardHeader>
-                <h4 className="text-center">{'Nextstrain'}</h4>
-                <p className="text-center">
-                  {t('our parent project, an open-source initiative to harness the potential of pathogen genome data')}
-                </p>
-              </CardHeader>
-
-              <CardBody>
-                <DownloadLinkList>
-                  <DownloadLink Icon={iconGlobe} text={'nextstrain.org'} url="https://nextstrain.org/" />
-                  <DownloadLink Icon={iconGithub} text={t('Source code')} url="https://github.com/nextstrain" />
-                  <DownloadLink Icon={iconBook} text={t('Documentation')} url="https://docs.nextstrain.org/" />
-                  <DownloadLink Icon={iconGlobe} text={'auspice.us'} url="https://auspice.us/" />
-                </DownloadLinkList>
-              </CardBody>
-            </Card>
-          </Col>
-        </Row>
-      </Col>
-    </Row>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/MainInputForm.tsx b/packages_rs/nextclade-web/src/components/Main/MainInputForm.tsx
index ae5c471e7..94b4b9e84 100644
--- a/packages_rs/nextclade-web/src/components/Main/MainInputForm.tsx
+++ b/packages_rs/nextclade-web/src/components/Main/MainInputForm.tsx
@@ -1,55 +1,40 @@
-import React, { useMemo, useState } from 'react'
-
-import { useRecoilValue } from 'recoil'
-import { Container as ContainerBase } from 'reactstrap'
+import React from 'react'
+import { QuerySequenceFilePicker } from 'src/components/Main/QuerySequenceFilePicker'
 import styled from 'styled-components'
-
-import { DatasetSelector } from 'src/components/Main/DatasetSelector'
-import { MainInputFormRunStep } from 'src/components/Main/MainInputFormRunStep'
-import { datasetCurrentAtom } from 'src/state/dataset.state'
+import { Col as ColBase, Row as RowBase } from 'reactstrap'
 import { useUpdatedDatasetIndex } from 'src/io/fetchDatasets'
+import { DatasetSelector } from 'src/components/Main/DatasetSelector'
 
-export const Container = styled(ContainerBase)`
-  display: flex;
-  margin: 0;
-  padding: 0;
+const Container = styled.div`
+  height: 100%;
+  overflow: hidden;
+  margin-top: 10px;
 `
 
-export const Centered = styled.section`
-  margin: auto;
-
-  @media (min-width: 768px) {
-    min-width: 600px;
-  }
-
-  @media (max-width: 767.98px) {
-    margin: 0;
-    width: 100%;
-  }
+const Row = styled(RowBase)`
+  overflow: hidden;
+  height: 100%;
+`
 
-  max-width: 800px;
+const Col = styled(ColBase)`
+  overflow: hidden;
+  height: 100%;
 `
 
 export function MainInputForm() {
-  const [searchTerm, setSearchTerm] = useState('')
-  const currentDataset = useRecoilValue(datasetCurrentAtom)
-
   // This periodically fetches dataset index and updates the list of datasets.
   useUpdatedDatasetIndex()
 
-  const FormBody = useMemo(
-    () =>
-      currentDataset ? (
-        <MainInputFormRunStep />
-      ) : (
-        <DatasetSelector searchTerm={searchTerm} setSearchTerm={setSearchTerm} />
-      ),
-    [currentDataset, searchTerm],
-  )
-
   return (
-    <Container fluid>
-      <Centered>{FormBody}</Centered>
+    <Container>
+      <Row noGutters className="flex-column-reverse flex-lg-row">
+        <Col lg={6} className="">
+          <DatasetSelector />
+        </Col>
+        <Col lg={6} className="">
+          <QuerySequenceFilePicker />
+        </Col>
+      </Row>
     </Container>
   )
 }
diff --git a/packages_rs/nextclade-web/src/components/Main/MainInputFormRunStep.tsx b/packages_rs/nextclade-web/src/components/Main/MainInputFormRunStep.tsx
deleted file mode 100644
index b6d6c250f..000000000
--- a/packages_rs/nextclade-web/src/components/Main/MainInputFormRunStep.tsx
+++ /dev/null
@@ -1,34 +0,0 @@
-import React from 'react'
-
-import { Col, Container, Row } from 'reactstrap'
-import styled from 'styled-components'
-
-import { MainInputFormSequenceFilePicker } from 'src/components/Main/MainInputFormSequenceFilePicker'
-import { DatasetCurrent } from './DatasetCurrent'
-
-const MainInputFormContainer = styled(Container)`
-  display: flex;
-  flex-direction: column;
-  width: 100%;
-  height: 100%;
-  margin: 0;
-  padding: 0;
-`
-
-export function MainInputFormRunStep() {
-  return (
-    <MainInputFormContainer fluid>
-      <Row noGutters>
-        <Col>
-          <DatasetCurrent />
-        </Col>
-      </Row>
-
-      <Row noGutters className="my-3">
-        <Col>
-          <MainInputFormSequenceFilePicker />
-        </Col>
-      </Row>
-    </MainInputFormContainer>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/MainInputFormSequenceFilePicker.tsx b/packages_rs/nextclade-web/src/components/Main/MainInputFormSequenceFilePicker.tsx
deleted file mode 100644
index 8b9a0f25a..000000000
--- a/packages_rs/nextclade-web/src/components/Main/MainInputFormSequenceFilePicker.tsx
+++ /dev/null
@@ -1,155 +0,0 @@
-import { noop } from 'lodash'
-import React, { useCallback, useMemo } from 'react'
-import { Button, Col, Form, FormGroup, Row } from 'reactstrap'
-import { useRecoilState, useRecoilValue } from 'recoil'
-import { MainInputFormSequencesCurrent } from 'src/components/Main/MainInputFormSequencesCurrent'
-import { useRunAnalysis } from 'src/hooks/useRunAnalysis'
-import { canRunAtom } from 'src/state/results.state'
-import styled from 'styled-components'
-
-import { datasetCurrentAtom } from 'src/state/dataset.state'
-import { hasInputErrorsAtom, qrySeqErrorAtom } from 'src/state/error.state'
-import { shouldRunAutomaticallyAtom } from 'src/state/settings.state'
-import type { AlgorithmInput } from 'src/types'
-import { Toggle } from 'src/components/Common/Toggle'
-import { FlexLeft, FlexRight } from 'src/components/FilePicker/FilePickerStyles'
-import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
-import { AlgorithmInputDefault } from 'src/io/AlgorithmInput'
-import { FilePicker } from 'src/components/FilePicker/FilePicker'
-import { FileIconFasta } from 'src/components/Common/FileIcons'
-import { hasRequiredInputsAtom, useQuerySeqInputs } from 'src/state/inputs.state'
-
-const SequenceFilePickerContainer = styled.section`
-  display: flex;
-  flex-direction: column;
-  width: 100%;
-  height: 100%;
-`
-
-const ButtonRunStyled = styled(Button)`
-  min-width: 160px;
-  min-height: 50px;
-  margin-left: 1rem;
-`
-
-export function MainInputFormSequenceFilePicker() {
-  const { t } = useTranslationSafe()
-
-  const datasetCurrent = useRecoilValue(datasetCurrentAtom)
-  const { qryInputs, addQryInputs } = useQuerySeqInputs()
-  const qrySeqError = useRecoilValue(qrySeqErrorAtom)
-
-  const canRun = useRecoilValue(canRunAtom)
-  const [shouldRunAutomatically, setShouldRunAutomatically] = useRecoilState(shouldRunAutomaticallyAtom)
-  const hasRequiredInputs = useRecoilValue(hasRequiredInputsAtom)
-  const hasInputErrors = useRecoilValue(hasInputErrorsAtom)
-
-  const icon = useMemo(() => <FileIconFasta />, [])
-
-  const run = useRunAnalysis()
-
-  const setSequences = useCallback(
-    (inputs: AlgorithmInput[]) => {
-      addQryInputs(inputs)
-
-      if (shouldRunAutomatically) {
-        run()
-      }
-    },
-    [addQryInputs, run, shouldRunAutomatically],
-  )
-
-  const setExampleSequences = useCallback(() => {
-    if (datasetCurrent) {
-      addQryInputs([new AlgorithmInputDefault(datasetCurrent)])
-
-      if (shouldRunAutomatically) {
-        run()
-      }
-    }
-  }, [addQryInputs, datasetCurrent, run, shouldRunAutomatically])
-
-  const { isRunButtonDisabled, runButtonColor, runButtonTooltip } = useMemo(() => {
-    const isRunButtonDisabled = !(canRun && hasRequiredInputs) || hasInputErrors
-    return {
-      isRunButtonDisabled,
-      runButtonColor: isRunButtonDisabled ? 'secondary' : 'success',
-      runButtonTooltip: isRunButtonDisabled
-        ? t('Please provide input files for the algorithm')
-        : t('Launch the algorithm!'),
-    }
-  }, [canRun, hasInputErrors, hasRequiredInputs, t])
-
-  const LoadExampleLink = useMemo(() => {
-    const cannotLoadExample = hasInputErrors || !datasetCurrent
-    return (
-      <Button color="link" onClick={setExampleSequences} disabled={cannotLoadExample}>
-        {t('Load example')}
-      </Button>
-    )
-  }, [datasetCurrent, hasInputErrors, setExampleSequences, t])
-
-  const onToggleRunAutomatically = useCallback(() => {
-    setShouldRunAutomatically((shouldRunAutomatically) => !shouldRunAutomatically)
-  }, [setShouldRunAutomatically])
-
-  const headerText = useMemo(() => {
-    if (qryInputs.length > 0) {
-      return t('Add more sequence data')
-    }
-    return t('Provide sequence data')
-  }, [qryInputs.length, t])
-
-  return (
-    <SequenceFilePickerContainer>
-      <MainInputFormSequencesCurrent />
-
-      <FilePicker
-        className="my-3"
-        title={headerText}
-        icon={icon}
-        exampleUrl="https://example.com/sequences.fasta"
-        pasteInstructions={t('Enter sequence data in FASTA format')}
-        input={undefined}
-        error={qrySeqError}
-        isInProgress={false}
-        onRemove={noop}
-        onInputs={setSequences}
-        multiple
-      />
-
-      <Row noGutters className="mt-2">
-        <Col className="w-100 d-flex">
-          <FlexLeft>
-            <Form className="d-flex h-100 mt-1">
-              <FormGroup className="my-auto">
-                <Toggle
-                  identifier="toggle-run-automatically"
-                  checked={shouldRunAutomatically}
-                  onCheckedChanged={onToggleRunAutomatically}
-                >
-                  <span title="Run Nextclade automatically after sequence data is provided">
-                    {t('Run automatically')}
-                  </span>
-                </Toggle>
-              </FormGroup>
-            </Form>
-          </FlexLeft>
-
-          <FlexRight>
-            {LoadExampleLink}
-
-            <ButtonRunStyled
-              disabled={isRunButtonDisabled}
-              color={runButtonColor}
-              onClick={run}
-              title={runButtonTooltip}
-            >
-              {t('Run')}
-            </ButtonRunStyled>
-          </FlexRight>
-        </Col>
-      </Row>
-    </SequenceFilePickerContainer>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/MainInputFormSequencesCurrent.tsx b/packages_rs/nextclade-web/src/components/Main/MainInputFormSequencesCurrent.tsx
deleted file mode 100644
index feef13229..000000000
--- a/packages_rs/nextclade-web/src/components/Main/MainInputFormSequencesCurrent.tsx
+++ /dev/null
@@ -1,111 +0,0 @@
-import React, { useCallback, useMemo } from 'react'
-import { Button, Col, Container, Row } from 'reactstrap'
-import styled from 'styled-components'
-import { ImCross } from 'react-icons/im'
-import { rgba } from 'polished'
-
-import { AlgorithmInput } from 'src/types'
-import { ButtonTransparent } from 'src/components/Common/ButtonTransparent'
-import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
-import { useQuerySeqInputs } from 'src/state/inputs.state'
-
-const SequencesCurrentWrapper = styled(Container)`
-  border: 1px #ccc9 solid;
-  border-radius: 5px;
-`
-
-const InputFileInfoWrapper = styled.section`
-  box-shadow: ${(props) => `1px 1px 5px ${rgba(props.theme.black, 0.1)}`};
-  border: 1px #ccc9 solid;
-  border-radius: 5px;
-  margin: 0.5rem 0;
-  padding: 0.5rem 1rem;
-`
-
-export interface InputFileInfoProps {
-  input: AlgorithmInput
-  index: number
-}
-
-export function InputFileInfo({ input, index }: InputFileInfoProps) {
-  const { t } = useTranslationSafe()
-  const { removeQryInput } = useQuerySeqInputs()
-  const onRemoveClicked = useCallback(() => {
-    removeQryInput(index)
-  }, [index, removeQryInput])
-
-  return (
-    <InputFileInfoWrapper>
-      <Row noGutters className="d-flex">
-        <Col className="flex-grow-1">{input.description}</Col>
-        <ButtonTransparent title={t('Remove this input')} onClick={onRemoveClicked}>
-          <ImCross />
-        </ButtonTransparent>
-      </Row>
-    </InputFileInfoWrapper>
-  )
-}
-
-export function MainInputFormSequencesCurrent() {
-  const { t } = useTranslationSafe()
-  const { qryInputs, clearQryInputs } = useQuerySeqInputs()
-
-  const inputComponents = useMemo(
-    () => (
-      <Row noGutters>
-        <Col>
-          {qryInputs.map((input, index) => (
-            // eslint-disable-next-line react/no-array-index-key
-            <InputFileInfo key={`${input.name} ${index}`} input={input} index={index} />
-          ))}
-        </Col>
-      </Row>
-    ),
-    [qryInputs],
-  )
-
-  const removeButton = useMemo(
-    () =>
-      qryInputs.length > 0 ? (
-        <Row noGutters>
-          <Col className="d-flex w-100">
-            <Button className="ml-auto" color="link" onClick={clearQryInputs} title={t('Remove all input files')}>
-              {t('Remove all')}
-            </Button>
-          </Col>
-        </Row>
-      ) : null,
-
-    [clearQryInputs, qryInputs.length, t],
-  )
-  const headerText = useMemo(() => {
-    if (qryInputs.length === 0) {
-      return null
-    }
-    return (
-      <Row noGutters>
-        <Col>
-          <h4>{t("Sequence data you've added")}</h4>
-        </Col>
-      </Row>
-    )
-  }, [qryInputs.length, t])
-
-  if (qryInputs.length === 0) {
-    return null
-  }
-
-  return (
-    <section className="my-2">
-      {headerText}
-      <SequencesCurrentWrapper>
-        <Row noGutters>
-          <Col>
-            {inputComponents}
-            {removeButton}
-          </Col>
-        </Row>
-      </SequencesCurrentWrapper>
-    </section>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/MainPage.tsx b/packages_rs/nextclade-web/src/components/Main/MainPage.tsx
index 1af901864..be7f8592c 100644
--- a/packages_rs/nextclade-web/src/components/Main/MainPage.tsx
+++ b/packages_rs/nextclade-web/src/components/Main/MainPage.tsx
@@ -1,19 +1,12 @@
 import React from 'react'
 
-import { LayoutMain } from 'src/components/Layout/LayoutMain'
-
+import { Layout } from 'src/components/Layout/Layout'
 import { MainInputForm } from 'src/components/Main/MainInputForm'
-import { MainSectionInfo } from 'src/components/Main/MainSectionInfo'
-import { MainSectionTitle } from 'src/components/Main/MainSectionTitle'
-import { TeamCredits } from 'src/components/Team/TeamCredits'
 
 export function MainPage() {
   return (
-    <LayoutMain>
-      <MainSectionTitle />
+    <Layout>
       <MainInputForm />
-      <MainSectionInfo />
-      <TeamCredits />
-    </LayoutMain>
+    </Layout>
   )
 }
diff --git a/packages_rs/nextclade-web/src/components/Main/MainSectionInfo.tsx b/packages_rs/nextclade-web/src/components/Main/MainSectionInfo.tsx
deleted file mode 100644
index 47669bece..000000000
--- a/packages_rs/nextclade-web/src/components/Main/MainSectionInfo.tsx
+++ /dev/null
@@ -1,26 +0,0 @@
-import React from 'react'
-
-import { Col, Row } from 'reactstrap'
-
-import { About } from 'src/components/About/About'
-import { Downloads } from 'src/components/Main/Downloads'
-
-export function MainSectionInfo() {
-  return (
-    <Row noGutters className="mx-2 mt-3 main-info-section">
-      <Col>
-        <Row noGutters>
-          <Col>
-            <Downloads />
-          </Col>
-        </Row>
-
-        <Row noGutters className="mt-3 mx-auto">
-          <Col>
-            <About />
-          </Col>
-        </Row>
-      </Col>
-    </Row>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/MainSectionTitle.tsx b/packages_rs/nextclade-web/src/components/Main/MainSectionTitle.tsx
deleted file mode 100644
index b50920bbe..000000000
--- a/packages_rs/nextclade-web/src/components/Main/MainSectionTitle.tsx
+++ /dev/null
@@ -1,19 +0,0 @@
-import React from 'react'
-
-import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslationSafe'
-import { Col, Row } from 'reactstrap'
-
-import { Subtitle, Title } from 'src/components/Main/Title'
-
-export function MainSectionTitle() {
-  const { t } = useTranslation()
-
-  return (
-    <Row noGutters className="hero-bg text-center mb-lg-3 mb-sm-2">
-      <Col>
-        <Title />
-        <Subtitle>{t('Clade assignment, mutation calling, and sequence quality checks')}</Subtitle>
-      </Col>
-    </Row>
-  )
-}
diff --git a/packages_rs/nextclade-web/src/components/Main/QuerySequenceFilePicker.tsx b/packages_rs/nextclade-web/src/components/Main/QuerySequenceFilePicker.tsx
new file mode 100644
index 000000000..0c69f6f48
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Main/QuerySequenceFilePicker.tsx
@@ -0,0 +1,104 @@
+import React, { useCallback, useMemo } from 'react'
+import { useRecoilValue } from 'recoil'
+import styled from 'styled-components'
+import type { AlgorithmInput } from 'src/types'
+import { QuerySequenceList } from 'src/components/Main/QuerySequenceList'
+import { RunPanel } from 'src/components/Main/RunPanel'
+import { useRunAnalysis } from 'src/hooks/useRunAnalysis'
+import { useRunSeqAutodetect } from 'src/hooks/useRunSeqAutodetect'
+import { useRecoilToggle } from 'src/hooks/useToggle'
+import { qrySeqErrorAtom } from 'src/state/error.state'
+import { shouldRunAutomaticallyAtom, shouldSuggestDatasetsAtom } from 'src/state/settings.state'
+import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+import { FilePicker } from 'src/components/FilePicker/FilePicker'
+import { FileIconFasta } from 'src/components/Common/FileIcons'
+import { useQuerySeqInputs } from 'src/state/inputs.state'
+
+export function QuerySequenceFilePicker() {
+  const { t } = useTranslationSafe()
+
+  const { qryInputs, addQryInputs } = useQuerySeqInputs()
+  const qrySeqError = useRecoilValue(qrySeqErrorAtom)
+
+  const { state: shouldRunAutomatically } = useRecoilToggle(shouldRunAutomaticallyAtom)
+  const shouldSuggestDatasets = useRecoilValue(shouldSuggestDatasetsAtom)
+
+  const icon = useMemo(() => <FileIconFasta />, [])
+
+  const runAnalysis = useRunAnalysis()
+  const runAutodetect = useRunSeqAutodetect()
+
+  const setSequences = useCallback(
+    (inputs: AlgorithmInput[]) => {
+      addQryInputs(inputs)
+      if (shouldSuggestDatasets) {
+        runAutodetect()
+      }
+      if (shouldRunAutomatically) {
+        runAnalysis()
+      }
+    },
+    [addQryInputs, runAnalysis, runAutodetect, shouldRunAutomatically, shouldSuggestDatasets],
+  )
+
+  const headerText = useMemo(() => {
+    if (qryInputs.length > 0) {
+      return t('Add more sequence data')
+    }
+    return t('Provide sequence data')
+  }, [qryInputs.length, t])
+
+  return (
+    <Container>
+      <Header>
+        <FilePicker
+          title={headerText}
+          icon={icon}
+          exampleUrl="https://example.com/sequences.fasta"
+          pasteInstructions={t('Enter sequence data in FASTA format')}
+          input={undefined}
+          error={qrySeqError}
+          isInProgress={false}
+          onInputs={setSequences}
+          multiple
+        />
+      </Header>
+
+      <Main>
+        <QuerySequenceList />
+      </Main>
+
+      <Footer>
+        <RunPanel />
+      </Footer>
+    </Container>
+  )
+}
+
+const Container = styled.div`
+  display: flex;
+  flex: 1;
+  flex-direction: column;
+  height: 100%;
+  overflow: hidden;
+  margin-left: 10px;
+  margin-right: 12px;
+`
+
+const Header = styled.div`
+  display: flex;
+  flex: 0;
+  margin-bottom: 15px;
+`
+
+const Main = styled.div`
+  display: flex;
+  flex: 1;
+  flex-direction: column;
+  overflow: hidden;
+`
+
+const Footer = styled.div`
+  display: flex;
+  flex: 0;
+`
diff --git a/packages_rs/nextclade-web/src/components/Main/QuerySequenceList.tsx b/packages_rs/nextclade-web/src/components/Main/QuerySequenceList.tsx
new file mode 100644
index 000000000..61b0f91eb
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Main/QuerySequenceList.tsx
@@ -0,0 +1,88 @@
+import React, { useCallback, useMemo } from 'react'
+import { Button } from 'reactstrap'
+import styled, { useTheme } from 'styled-components'
+import { ImCross } from 'react-icons/im'
+import { AlgorithmInput } from 'src/types'
+import { ButtonTransparent } from 'src/components/Common/ButtonTransparent'
+import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+import { useQuerySeqInputs } from 'src/state/inputs.state'
+import { UlGeneric } from '../Common/List'
+
+export function QuerySequenceList() {
+  const { t } = useTranslationSafe()
+  const { qryInputs, clearQryInputs } = useQuerySeqInputs()
+
+  const listItems = useMemo(() => {
+    return qryInputs.map((input, index) => (
+      <Li key={input.uid}>
+        <InputFileInfo input={input} index={index} />
+      </Li>
+    ))
+  }, [qryInputs])
+
+  const headerText = useMemo(() => {
+    if (qryInputs.length === 0) {
+      return null
+    }
+    return (
+      <div className="d-flex">
+        <h4>{t("Sequence data you've added")}</h4>
+        <Button className="ml-auto" color="link" onClick={clearQryInputs} title={t('Remove all input files')}>
+          {t('Remove all')}
+        </Button>
+      </div>
+    )
+  }, [clearQryInputs, qryInputs.length, t])
+
+  if (qryInputs.length === 0) {
+    return null
+  }
+
+  return (
+    <>
+      {headerText}
+      <Ul>{listItems}</Ul>
+    </>
+  )
+}
+
+export const Ul = styled(UlGeneric)`
+  flex: 1;
+  overflow: auto;
+`
+
+export const Li = styled.li`
+  margin: 5px 0;
+  border-radius: 5px !important;
+`
+
+export interface InputFileInfoProps {
+  input: AlgorithmInput
+  index: number
+}
+
+export function InputFileInfo({ input, index }: InputFileInfoProps) {
+  const { t } = useTranslationSafe()
+  const theme = useTheme()
+  const { removeQryInput } = useQuerySeqInputs()
+  const onRemoveClicked = useCallback(() => {
+    removeQryInput(index)
+  }, [index, removeQryInput])
+
+  return (
+    <Container>
+      <h6 className="flex-grow-1 my-auto">{input.description}</h6>
+      <ButtonTransparent title={t(' Remove this input')} onClick={onRemoveClicked}>
+        <ImCross color={theme.gray500} />
+      </ButtonTransparent>
+    </Container>
+  )
+}
+
+const Container = styled.section`
+  display: flex;
+  padding: 0.5rem 1rem;
+  box-shadow: 0 0 12px 0 #0002;
+  border: 1px #ccc9 solid;
+  border-radius: 5px;
+`
diff --git a/packages_rs/nextclade-web/src/components/Main/RunPanel.tsx b/packages_rs/nextclade-web/src/components/Main/RunPanel.tsx
new file mode 100644
index 000000000..9e573dc0d
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Main/RunPanel.tsx
@@ -0,0 +1,132 @@
+import React, { useCallback, useMemo } from 'react'
+import styled from 'styled-components'
+import { Button, Form as FormBase, FormGroup } from 'reactstrap'
+import { useRecoilValue } from 'recoil'
+import { useRunAnalysis } from 'src/hooks/useRunAnalysis'
+import { useRunSeqAutodetect } from 'src/hooks/useRunSeqAutodetect'
+import { useRecoilToggle } from 'src/hooks/useToggle'
+import { canRunAtom } from 'src/state/results.state'
+import { datasetCurrentAtom } from 'src/state/dataset.state'
+import { hasInputErrorsAtom } from 'src/state/error.state'
+import { shouldRunAutomaticallyAtom, shouldSuggestDatasetsAtom } from 'src/state/settings.state'
+import { Toggle } from 'src/components/Common/Toggle'
+import { FlexLeft, FlexRight } from 'src/components/FilePicker/FilePickerStyles'
+import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+import { AlgorithmInputDefault } from 'src/io/AlgorithmInput'
+import { hasRequiredInputsAtom, useQuerySeqInputs } from 'src/state/inputs.state'
+
+export function RunPanel() {
+  const { t } = useTranslationSafe()
+
+  const datasetCurrent = useRecoilValue(datasetCurrentAtom)
+  const { addQryInputs } = useQuerySeqInputs()
+
+  const canRun = useRecoilValue(canRunAtom)
+  const { state: shouldRunAutomatically, toggle: toggleRunAutomatically } = useRecoilToggle(shouldRunAutomaticallyAtom)
+  const shouldSuggestDatasets = useRecoilValue(shouldSuggestDatasetsAtom)
+
+  const hasRequiredInputs = useRecoilValue(hasRequiredInputsAtom)
+  const hasInputErrors = useRecoilValue(hasInputErrorsAtom)
+
+  const runAnalysis = useRunAnalysis()
+  const runAutodetect = useRunSeqAutodetect()
+
+  const setExampleSequences = useCallback(() => {
+    if (datasetCurrent) {
+      addQryInputs([new AlgorithmInputDefault(datasetCurrent)])
+      if (shouldSuggestDatasets) {
+        runAutodetect()
+      }
+      if (shouldRunAutomatically) {
+        runAnalysis()
+      }
+    }
+  }, [addQryInputs, datasetCurrent, runAnalysis, runAutodetect, shouldRunAutomatically, shouldSuggestDatasets])
+
+  const { isRunButtonDisabled, runButtonColor, runButtonTooltip } = useMemo(() => {
+    const isRunButtonDisabled = !(canRun && hasRequiredInputs) || hasInputErrors
+    return {
+      isRunButtonDisabled,
+      runButtonColor: isRunButtonDisabled ? 'secondary' : 'success',
+      runButtonTooltip: isRunButtonDisabled
+        ? t('Please provide sequence data for the algorithm')
+        : t('Launch the algorithm!'),
+    }
+  }, [canRun, hasInputErrors, hasRequiredInputs, t])
+
+  return (
+    <Container>
+      <Form>
+        <FlexLeft>
+          <FormGroup>
+            <Toggle
+              identifier="toggle-run-automatically"
+              checked={shouldRunAutomatically}
+              onCheckedChanged={toggleRunAutomatically}
+            >
+              <span title={t('Run Nextclade automatically after sequence data is provided')}>
+                {t('Run automatically')}
+              </span>
+            </Toggle>
+          </FormGroup>
+        </FlexLeft>
+
+        <FlexRight>
+          <Button color="link" onClick={setExampleSequences} disabled={hasInputErrors || !datasetCurrent}>
+            {t('Load example')}
+          </Button>
+
+          <ButtonRunStyled
+            disabled={isRunButtonDisabled}
+            color={runButtonColor}
+            onClick={runAnalysis}
+            title={runButtonTooltip}
+          >
+            {t('Run')}
+          </ButtonRunStyled>
+        </FlexRight>
+      </Form>
+    </Container>
+  )
+}
+
+const Container = styled.div`
+  flex: 1;
+  margin-top: auto;
+  margin-bottom: 7px;
+  padding: 7px 0;
+  padding-right: 5px;
+`
+
+const Form = styled(FormBase)`
+  display: flex;
+  width: 100%;
+  height: 100%;
+  margin-top: auto;
+  padding: 10px;
+  border: 1px #ccc9 solid;
+  border-radius: 5px;
+`
+
+// const Container = styled.div`
+//   flex: 1;
+//   margin-top: auto;
+//   margin-bottom: 7px;
+//   padding: 10px;
+//   padding-right: 5px;
+//   box-shadow: 0 3px 20px 3px #0003;
+// `
+//
+// const Form = styled(FormBase)`
+//   display: flex;
+//   width: 100%;
+//   height: 100%;
+//   padding: 10px;
+//   border: 1px #ccc9 solid;
+//   border-radius: 5px;
+// `
+
+const ButtonRunStyled = styled(Button)`
+  min-width: 150px;
+  min-height: 45px;
+`
diff --git a/packages_rs/nextclade-web/src/components/Main/SuggestionPanel.tsx b/packages_rs/nextclade-web/src/components/Main/SuggestionPanel.tsx
new file mode 100644
index 000000000..8d9800c17
--- /dev/null
+++ b/packages_rs/nextclade-web/src/components/Main/SuggestionPanel.tsx
@@ -0,0 +1,101 @@
+import { isNil } from 'lodash'
+import React, { useMemo } from 'react'
+import { useRunSeqAutodetect } from 'src/hooks/useRunSeqAutodetect'
+import { hasRequiredInputsAtom } from 'src/state/inputs.state'
+import styled from 'styled-components'
+import { Button, Form as FormBase, FormGroup } from 'reactstrap'
+import { useRecoilValue, useResetRecoilState } from 'recoil'
+import { Toggle } from 'src/components/Common/Toggle'
+import { FlexLeft, FlexRight } from 'src/components/FilePicker/FilePickerStyles'
+import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
+import { useRecoilToggle } from 'src/hooks/useToggle'
+import { autodetectResultsAtom, hasAutodetectResultsAtom } from 'src/state/autodetect.state'
+import { minimizerIndexVersionAtom } from 'src/state/dataset.state'
+import { shouldSuggestDatasetsAtom } from 'src/state/settings.state'
+
+export function SuggestionPanel() {
+  const { t } = useTranslationSafe()
+  const minimizerIndexVersion = useRecoilValue(minimizerIndexVersionAtom)
+  const resetAutodetectResults = useResetRecoilState(autodetectResultsAtom)
+  const hasAutodetectResults = useRecoilValue(hasAutodetectResultsAtom)
+  const hasRequiredInputs = useRecoilValue(hasRequiredInputsAtom)
+  const runSuggest = useRunSeqAutodetect()
+
+  const { canRun, runButtonColor, runButtonTooltip } = useMemo(() => {
+    const canRun = hasRequiredInputs
+    return {
+      canRun,
+      runButtonColor: !canRun ? 'secondary' : 'success',
+      runButtonTooltip: !canRun ? t('Please provide sequence data for the algorithm') : t('Launch suggestions engine!'),
+    }
+  }, [hasRequiredInputs, t])
+
+  if (isNil(minimizerIndexVersion)) {
+    return null
+  }
+
+  return (
+    <Container>
+      <Form>
+        <FlexLeft>
+          <AutosuggestionToggle />
+        </FlexLeft>
+
+        <FlexRight>
+          <Button color="link" onClick={resetAutodetectResults} disabled={!hasAutodetectResults}>
+            {t('Reset suggestions')}
+          </Button>
+
+          <ButtonRunStyled onClick={runSuggest} disabled={!canRun} color={runButtonColor} title={runButtonTooltip}>
+            {t('Suggest')}
+          </ButtonRunStyled>
+        </FlexRight>
+      </Form>
+    </Container>
+  )
+}
+
+const Container = styled.div`
+  flex: 1;
+  margin-top: auto;
+  margin-bottom: 7px;
+  padding: 7px 0;
+  padding-left: 5px;
+`
+
+const Form = styled(FormBase)`
+  display: flex;
+  width: 100%;
+  height: 100%;
+  margin-top: auto;
+  padding: 10px;
+  border: 1px #ccc9 solid;
+  border-radius: 5px;
+`
+
+const ButtonRunStyled = styled(Button)`
+  min-width: 150px;
+  min-height: 45px;
+`
+
+function AutosuggestionToggle() {
+  const { t } = useTranslationSafe()
+  const { state: shouldSuggestDatasets, toggle: toggleSuggestDatasets } = useRecoilToggle(shouldSuggestDatasetsAtom)
+  return (
+    <FormGroup>
+      <Toggle
+        identifier="toggle-suggest-datasets"
+        checked={shouldSuggestDatasets}
+        onCheckedChanged={toggleSuggestDatasets}
+      >
+        <span
+          title={t(
+            'Enable suggestion of best matching pathogen datasets. Please add sequence data to launch suggestion engine.',
+          )}
+        >
+          {t('Suggest automatically')}
+        </span>
+      </Toggle>
+    </FormGroup>
+  )
+}
diff --git a/packages_rs/nextclade-web/src/components/Main/Title.tsx b/packages_rs/nextclade-web/src/components/Main/Title.tsx
deleted file mode 100644
index 1f8caf49b..000000000
--- a/packages_rs/nextclade-web/src/components/Main/Title.tsx
+++ /dev/null
@@ -1,82 +0,0 @@
-import React from 'react'
-
-import styled from 'styled-components'
-
-import { TITLE_COLORS } from 'src/constants'
-
-// eslint-disable-next-line prefer-destructuring
-const PACKAGE_VERSION = process.env.PACKAGE_VERSION
-
-// Borrowed with modifications from Nextstrain.org
-// https://github.com/nextstrain/nextstrain.org/blob/master/static-site/src/components/splash/title.jsx
-
-const TitleH1 = styled.h1`
-  display: inline;
-  margin-top: 0px;
-  margin-bottom: 0px;
-  font-weight: 300;
-  letter-spacing: -1px;
-  font-size: 6rem;
-
-  @media (max-width: 767.98px) {
-    font-size: 5rem;
-  }
-
-  @media (max-width: 576px) {
-    font-size: 3.5rem;
-  }
-`
-
-const VersionNumberBadge = styled.p`
-  display: inline;
-  font-size: 0.85rem;
-  color: #7b838a;
-
-  @media (max-width: 767.98px) {
-    left: -35px;
-    font-size: 0.8rem;
-  }
-
-  @media (max-width: 576px) {
-    left: -30px;
-    font-size: 0.75rem;
-  }
-`
-
-const LetterSpan = styled.span<{ pos: number }>`
-  color: ${(props) => TITLE_COLORS[props.pos]};
-`
-
-export function Title() {
-  return (
-    <span>
-      <TitleH1>
-        {'Nextclade'.split('').map((letter, i) => (
-          // eslint-disable-next-line react/no-array-index-key
-          <LetterSpan key={`${i}_${letter}`} pos={i}>
-            {letter}
-          </LetterSpan>
-        ))}
-      </TitleH1>
-      {PACKAGE_VERSION && <VersionNumberBadge color="secondary">{`v${PACKAGE_VERSION}`}</VersionNumberBadge>}
-    </span>
-  )
-}
-
-export const Subtitle = styled.p`
-  text-align: center;
-  font-size: 2rem;
-  font-weight: 300;
-
-  @media (max-width: 991.98px) {
-    font-size: 1.5rem;
-  }
-
-  @media (max-width: 767.98px) {
-    font-size: 1.2rem;
-  }
-
-  @media (max-width: 576px) {
-    font-size: 1rem;
-  }
-`
diff --git a/packages_rs/nextclade-web/src/components/Results/ExportDialogButton.tsx b/packages_rs/nextclade-web/src/components/Results/ExportDialogButton.tsx
index 3dc636898..6a9987130 100644
--- a/packages_rs/nextclade-web/src/components/Results/ExportDialogButton.tsx
+++ b/packages_rs/nextclade-web/src/components/Results/ExportDialogButton.tsx
@@ -34,9 +34,7 @@ import {
 import {
   DEFAULT_EXPORT_PARAMS,
   useExportCsv,
-  useExportErrorsCsv,
   useExportFasta,
-  useExportInsertionsCsv,
   useExportJson,
   useExportNdjson,
   useExportPeptides,
@@ -121,8 +119,6 @@ export interface ExportParams {
   filenameTreeNwk: string
   filenameFasta: string
   filenamePeptidesZip: string
-  filenameInsertionsCsv: string
-  filenameErrorsCsv: string
   filenamePeptidesTemplate: string
 }
 
@@ -213,8 +209,6 @@ export function DownloadListDialog({ toggleColumnConfigOpen }: DownloadListDialo
   const exportPeptides = useExportPeptides()
   const exportTree = useExportTree()
   const exportTreeNwk = useExportTreeNwk()
-  const exportInsertionsCsv = useExportInsertionsCsv()
-  const exportErrorsCsv = useExportErrorsCsv()
 
   const exportParams = useMemo(() => DEFAULT_EXPORT_PARAMS, [])
 
@@ -318,26 +312,6 @@ export function DownloadListDialog({ toggleColumnConfigOpen }: DownloadListDialo
         onDownload={exportPeptides}
       />
 
-      <ExportFileElement
-        Icon={FileIconCsv}
-        filename={exportParams.filenameInsertionsCsv}
-        HelpMain={t('Insertions in CSV format.')}
-        HelpDetails={t('Contains insertions stripped from aligned sequences.')}
-        HelpDownload={t('Download insertions in CSV format')}
-        onDownload={exportInsertionsCsv}
-      />
-
-      <ExportFileElement
-        Icon={FileIconCsv}
-        filename={exportParams.filenameErrorsCsv}
-        HelpMain={t('Errors, warnings, and failed genes in CSV format.')}
-        HelpDetails={t(
-          'Contains a list of errors, a list of warnings and a list of genes that failed processing, per sequence, in CSV format.',
-        )}
-        HelpDownload={t('Download warnings, and failed genes in CSV format')}
-        onDownload={exportErrorsCsv}
-      />
-
       <ExportFileElement
         Icon={FileIconZip}
         filename={exportParams.filenameZip}
diff --git a/packages_rs/nextclade-web/src/components/Results/ResultsPage.tsx b/packages_rs/nextclade-web/src/components/Results/ResultsPage.tsx
index 9a287e957..a5cb13c0a 100644
--- a/packages_rs/nextclade-web/src/components/Results/ResultsPage.tsx
+++ b/packages_rs/nextclade-web/src/components/Results/ResultsPage.tsx
@@ -3,7 +3,7 @@ import { useRecoilValue } from 'recoil'
 import styled from 'styled-components'
 
 import { resultsTableTotalWidthAtom } from 'src/state/settings.state'
-import { LayoutResults } from 'src/components/Layout/LayoutResults'
+import { Layout } from 'src/components/Layout/Layout'
 import { GeneMapTable } from 'src/components/GeneMap/GeneMapTable'
 import { ExportDialogButton } from 'src/components/Results/ExportDialogButton'
 import { ButtonNewRun } from 'src/components/Results/ButtonNewRun'
@@ -77,7 +77,7 @@ export function ResultsPage() {
   const totalWidth = useRecoilValue(resultsTableTotalWidthAtom)
 
   return (
-    <LayoutResults>
+    <Layout>
       <Container>
         <Header>
           <HeaderLeft>
@@ -121,6 +121,6 @@ export function ResultsPage() {
           </WrapperInner>
         </WrapperOuter>
       </Container>
-    </LayoutResults>
+    </Layout>
   )
 }
diff --git a/packages_rs/nextclade-web/src/components/Tree/TreePage.tsx b/packages_rs/nextclade-web/src/components/Tree/TreePage.tsx
index f9e42fac6..a20642f02 100644
--- a/packages_rs/nextclade-web/src/components/Tree/TreePage.tsx
+++ b/packages_rs/nextclade-web/src/components/Tree/TreePage.tsx
@@ -7,7 +7,7 @@ import FiltersSummary from 'auspice/src/components/info/filtersSummary'
 
 import type { State } from 'src/state/reducer'
 import i18nAuspice from 'src/i18n/i18n.auspice'
-import { LayoutResults } from 'src/components/Layout/LayoutResults'
+import { Layout } from 'src/components/Layout/Layout'
 import { LogoGisaid as LogoGisaidBase } from 'src/components/Common/LogoGisaid'
 import { ButtonBack } from 'src/components/Results/ButtonBack'
 import { Tree } from './Tree'
@@ -100,7 +100,7 @@ function TreePageDisconnected({ treeMeta }: TreePageProps) {
   )
 
   return (
-    <LayoutResults>
+    <Layout>
       <Container>
         <Header>
           <HeaderLeft>
@@ -135,7 +135,7 @@ function TreePageDisconnected({ treeMeta }: TreePageProps) {
           </AuspiceContainer>
         </MainContent>
       </Container>
-    </LayoutResults>
+    </Layout>
   )
 }
 
diff --git a/packages_rs/nextclade-web/src/constants.ts b/packages_rs/nextclade-web/src/constants.ts
index 1b9d7841e..e8eb6b34d 100644
--- a/packages_rs/nextclade-web/src/constants.ts
+++ b/packages_rs/nextclade-web/src/constants.ts
@@ -31,6 +31,8 @@ export const URL_GITHUB_COMMITS = 'https://github.com/nextstrain/nextclade/commi
 export const URL_CLADE_SCHEMA_REPO = 'https://github.com/nextstrain/ncov-clades-schema/'
 export const URL_CLADE_SCHEMA_SVG = 'https://raw.githubusercontent.com/nextstrain/ncov-clades-schema/master/clades.svg'
 
+export const URL_GITHUB_DATA_RAW = 'https://raw.githubusercontent.com/nextstrain/nextclade_data' as const
+
 export const SUPPORT_EMAIL = 'hello@nextstrain.org'
 
 export const TWITTER_USERNAME_RAW = 'nextstrain' as const
diff --git a/packages_rs/nextclade-web/src/helpers/colorHash.ts b/packages_rs/nextclade-web/src/helpers/colorHash.ts
new file mode 100644
index 000000000..79c91fad5
--- /dev/null
+++ b/packages_rs/nextclade-web/src/helpers/colorHash.ts
@@ -0,0 +1,215 @@
+/* eslint-disable no-param-reassign,no-plusplus,no-loops/no-loops,prefer-destructuring,no-else-return,unicorn/prefer-code-point */
+
+/**
+ * Color Hash
+ * by Simone Piccian (zanza00)
+ * taken with modifications from
+ * https://github.com/zanza00/color-hash/blob/6d43fa1b103fa090e1f0d788f5bfc4e99bf02263/src/color-hash.ts
+ */
+
+/**
+ * BKDR Hash (modified version)
+ *
+ * @param {String} str string to hash
+ * @returns {Number}
+ */
+export function BKDRHash(str: string): number {
+  const seed = 131
+  const seed2 = 137
+  let hash = 0
+  // make hash more sensitive for short string like 'a', 'b', 'c'
+  str += 'x'
+  const MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER / seed2
+  for (let i = 0; i < str.length; i++) {
+    if (hash > MAX_SAFE_INTEGER) {
+      hash = Math.floor(hash / seed2)
+    }
+    hash = hash * seed + str.charCodeAt(i)
+  }
+  return hash
+}
+
+/**
+ * Convert RGB Array to HEX
+ *
+ * @param {Array} RGBArray - [R, G, B]
+ * @returns {String} 6 digits hex starting with #
+ */
+function RGB2HEX(RGBArray: [number, number, number]): string {
+  let hex = '#'
+  RGBArray.forEach((value) => {
+    if (value < 16) {
+      hex += 0
+    }
+    hex += value.toString(16)
+  })
+  return hex
+}
+
+type func = (p: number, q: number) => (color: number) => number
+const paramToColor: func = (p, q) => (color) => {
+  if (color < 0) {
+    color++
+  }
+  if (color > 1) {
+    color--
+  }
+  if (color < 1 / 6) {
+    color = p + (q - p) * 6 * color
+  } else if (color < 0.5) {
+    color = q
+  } else if (color < 2 / 3) {
+    color = p + (q - p) * 6 * (2 / 3 - color)
+  } else {
+    color = p
+  }
+  return Math.round(color * 255)
+}
+
+/**
+ * Convert HSL to RGB
+ *
+ * @see {@link https://en.wikipedia.org/wiki/HSL_and_HSV} for further information.
+ * @param {Number} H Hue ∈ [0, 360)
+ * @param {Number} S Saturation ∈ [0, 1]
+ * @param {Number} L Lightness ∈ [0, 1]
+ * @returns {Array} R, G, B ∈ [0, 255]
+ */
+function HSL2RGB(H: number, S: number, L: number): [number, number, number] {
+  const H360 = H / 360
+
+  const q = L < 0.5 ? L * (1 + S) : L + S - L * S
+  const p = 2 * L - q
+
+  const partial = paramToColor(p, q)
+
+  return [partial(H360 + 1 / 3), partial(H360), partial(H360 - 1 / 3)]
+}
+
+export { HSL2RGB as testFroHSL2RGB }
+
+export type Options = {
+  lightness?: number | number[]
+  saturation?: number | number[]
+  hue?: number | { min: number; max: number } | { min: number; max: number }[]
+  hash?: typeof BKDRHash
+}
+
+/**
+ * Color Hash Class
+ *
+ * @class
+ */
+class ColorHash {
+  private L: number[]
+  private S: number[]
+  private hueRanges: { min: number; max: number }[]
+  private hash: (str: string) => number
+
+  constructor(options: Options = {}) {
+    const LS = [options.lightness ?? [0.35, 0.5, 0.65], options.saturation ?? [0.35, 0.5, 0.65]].map((param) => {
+      return Array.isArray(param) ? param.concat() : [param]
+    })
+
+    this.L = LS[0]
+    this.S = LS[1]
+
+    if (typeof options.hue === 'number') {
+      options.hue = { min: options.hue, max: options.hue }
+    }
+    if (typeof options.hue === 'object' && !Array.isArray(options.hue)) {
+      options.hue = [options.hue]
+    }
+    if (typeof options.hue === 'undefined') {
+      options.hue = []
+    }
+    this.hueRanges = options.hue.map((range) => {
+      return {
+        min: typeof range.min === 'undefined' ? 0 : range.min,
+        max: typeof range.max === 'undefined' ? 360 : range.max,
+      }
+    })
+
+    this.hash = options.hash ?? BKDRHash
+  }
+
+  private getHue(hash: number): number {
+    if (this.hueRanges.length > 0) {
+      const range = this.hueRanges[hash % this.hueRanges.length]
+      const hueResolution = 727 // note that 727 is a prime
+      return (((hash / this.hueRanges.length) % hueResolution) * (range.max - range.min)) / hueResolution + range.min
+    } else {
+      return hash % 359 // note that 359 is a prime
+    }
+  }
+
+  /**
+   * Returns the hash in [h, s, l].
+   * Note that H ∈ [0, 360); S ∈ [0, 1]; L ∈ [0, 1];
+   *
+   * @param {String} str string to hash
+   * @returns {Array} [h, s, l]
+   */
+  hsl(str: string): [number, number, number] {
+    const hash = this.hash(str)
+
+    const H = this.getHue(hash)
+
+    const sHash = Math.floor(hash / 360)
+
+    const S = this.S[sHash % this.S.length]
+
+    const lHash = Math.floor(sHash / this.S.length)
+
+    const L = this.L[lHash % this.L.length]
+
+    return [H, S, L]
+  }
+
+  /**
+   * Returns the hash in [r, g, b].
+   * Note that R, G, B ∈ [0, 255]
+   *
+   * @param {String} str string to hash
+   * @returns {Array} [r, g, b]
+   */
+  rgb(str: string): [number, number, number] {
+    const hsl = this.hsl(str)
+    return HSL2RGB(...hsl)
+  }
+
+  /**
+   * Returns the hash in hex
+   *
+   * @param {String} str string to hash
+   * @returns {String} hex with #
+   */
+  hex(str: string): string {
+    const rgb = this.rgb(str)
+    return RGB2HEX(rgb)
+  }
+}
+
+export interface ColorHashOptions extends Options {
+  reverse?: boolean
+  prefix?: string
+  suffix?: string
+}
+
+export function colorHash(content: string, options?: ColorHashOptions) {
+  let contentModified = content
+
+  if (options?.reverse) {
+    contentModified = contentModified.split('').reverse().join('')
+  }
+
+  if (options?.prefix) {
+    contentModified = `${options.prefix}${contentModified}`
+  }
+
+  if (options?.suffix) {
+    contentModified = `${contentModified}${options.suffix}`
+  }
+
+  return new ColorHash(options).hex(contentModified)
+}
diff --git a/packages_rs/nextclade-web/src/helpers/number.ts b/packages_rs/nextclade-web/src/helpers/number.ts
index 009cf5e23..e50f45095 100644
--- a/packages_rs/nextclade-web/src/helpers/number.ts
+++ b/packages_rs/nextclade-web/src/helpers/number.ts
@@ -4,3 +4,14 @@ export function ensureNumber(x?: boolean | number | null): number {
   }
   return x
 }
+
+export function isEven(x: number): boolean {
+  if (!Number.isInteger(x)) {
+    return false
+  }
+  return x % 2 === 0
+}
+
+export function isOdd(x: number): boolean {
+  return !isEven(x)
+}
diff --git a/packages_rs/nextclade-web/src/helpers/string.ts b/packages_rs/nextclade-web/src/helpers/string.ts
new file mode 100644
index 000000000..05647c8f8
--- /dev/null
+++ b/packages_rs/nextclade-web/src/helpers/string.ts
@@ -0,0 +1,47 @@
+/* eslint-disable no-plusplus,no-loops/no-loops */
+import { sortBy } from 'lodash'
+
+function getBigrams(s: string) {
+  const bigrams = []
+  const strLength = s.length
+  for (let i = 0; i < strLength; i++) {
+    bigrams.push(s.slice(i, 2))
+  }
+  return bigrams
+}
+
+export function sorensenDice(s1: string, s2: string) {
+  const length1 = s1.length - 1
+  const length2 = s2.length - 1
+  if (length1 < 1 || length2 < 1) {
+    return 0
+  }
+
+  let intersection = 0
+  const bigrams1: (string | null)[] = getBigrams(s1)
+  const bigrams2: (string | null)[] = getBigrams(s2)
+
+  for (let i = 0; i < length1; i++) {
+    for (let j = 0; j < length2; j++) {
+      if (bigrams1[i] === bigrams2[j]) {
+        intersection++
+        bigrams2[j] = null
+        break
+      }
+    }
+  }
+
+  return (2.0 * intersection) / (length1 + length2)
+}
+
+export function findSimilarStrings(haystack: string[], needle: string): string[] {
+  let scores = haystack
+    .map((candidate) => ({ candidate, score: sorensenDice(candidate, needle) }))
+    .filter(({ score }) => score > 0.0)
+  scores = sortBy(scores, ({ score }) => -score)
+  return scores.map(({ candidate }) => candidate)
+}
+
+export function firstLetter(s: string): string | undefined {
+  return s.split('').find((c) => c.toLowerCase().match(/[a-z]/))
+}
diff --git a/packages_rs/nextclade-web/src/helpers/uniqueId.ts b/packages_rs/nextclade-web/src/helpers/uniqueId.ts
new file mode 100644
index 000000000..e1525d609
--- /dev/null
+++ b/packages_rs/nextclade-web/src/helpers/uniqueId.ts
@@ -0,0 +1,5 @@
+import { nanoid } from 'nanoid'
+
+export function uniqueId(): string {
+  return nanoid()
+}
diff --git a/packages_rs/nextclade-web/src/hooks/useExportResults.ts b/packages_rs/nextclade-web/src/hooks/useExportResults.ts
index 2d3fb2ef8..16a86edec 100644
--- a/packages_rs/nextclade-web/src/hooks/useExportResults.ts
+++ b/packages_rs/nextclade-web/src/hooks/useExportResults.ts
@@ -1,7 +1,7 @@
-/* eslint-disable no-void,unicorn/no-await-expression-member,no-loops/no-loops,sonarjs/no-duplicate-string */
+/* eslint-disable no-void,unicorn/no-await-expression-member,no-loops/no-loops */
 import { Snapshot, useRecoilCallback } from 'recoil'
 
-import type { AnalysisError, AnalysisOutput, ErrorsFromWeb } from 'src/types'
+import type { AnalysisError, AnalysisOutput } from 'src/types'
 import type { ExportParams } from 'src/components/Results/ExportDialogButton'
 import { ErrorInternal } from 'src/helpers/ErrorInternal'
 import { notUndefinedOrNull } from 'src/helpers/notUndefined'
@@ -30,8 +30,6 @@ export const DEFAULT_EXPORT_PARAMS: ExportParams = {
   filenameTreeNwk: 'nextclade.nwk',
   filenameFasta: 'nextclade.aligned.fasta',
   filenamePeptidesZip: 'nextclade.peptides.fasta.zip',
-  filenameInsertionsCsv: 'nextclade.insertions.csv',
-  filenameErrorsCsv: 'nextclade.errors.csv',
   filenamePeptidesTemplate: 'nextclade.peptide.{{GENE}}.fasta',
 }
 
@@ -189,54 +187,6 @@ export function useExportTreeNwk() {
   })
 }
 
-async function prepareInsertionsCsv(snapshot: Snapshot, worker: ExportWorker) {
-  const results = await mapGoodResults(snapshot, (result) => result.analysisResult)
-  const errors = await mapErrors(snapshot, (err) => err)
-  return worker.serializeInsertionsCsv(results, errors)
-}
-
-export function useExportInsertionsCsv() {
-  return useResultsExport(async (filename, snapshot, worker) => {
-    const csvStr = await prepareInsertionsCsv(snapshot, worker)
-    saveFile(csvStr, filename, 'text/csv;charset=utf-8')
-  })
-}
-
-async function prepareErrorsCsv(snapshot: Snapshot, worker: ExportWorker) {
-  const results = await snapshot.getPromise(analysisResultsAtom)
-
-  const errors: ErrorsFromWeb[] = results.map(({ seqName, result, error }) => {
-    if (result) {
-      return {
-        seqName,
-        errors: '',
-        failedGenes: result.analysisResult.missingGenes,
-        warnings: result.analysisResult.warnings,
-      }
-    }
-
-    if (error) {
-      return {
-        seqName,
-        errors: error,
-        failedGenes: [],
-        warnings: [],
-      }
-    }
-
-    throw new ErrorInternal('When preparing errors for export: Expected either result or error to be non-nil')
-  })
-
-  return worker.serializeErrorsCsv(errors)
-}
-
-export function useExportErrorsCsv() {
-  return useResultsExport(async (filename, snapshot, worker) => {
-    const csvStr = await prepareErrorsCsv(snapshot, worker)
-    saveFile(csvStr, filename, 'text/csv;charset=utf-8')
-  })
-}
-
 async function preparePeptideFiles(snapshot: Snapshot) {
   const peptides = await mapGoodResults(snapshot, ({ translation, analysisResult: { seqName } }) => ({
     seqName,
@@ -279,8 +229,6 @@ export function useExportZip() {
     const treeJsonStr = await prepareOutputTree(snapshot)
     const treeNwkStr = await prepareOutputTreeNwk(snapshot)
     const fastaStr = await prepareOutputFasta(snapshot)
-    const insertionsCsvStr = await prepareInsertionsCsv(snapshot, worker)
-    const errorsCsvStr = await prepareErrorsCsv(snapshot, worker)
     const peptideFiles = await preparePeptideFiles(snapshot)
 
     const files: ZipFileDescription[] = [
@@ -291,8 +239,6 @@ export function useExportZip() {
       { filename: DEFAULT_EXPORT_PARAMS.filenameTree, data: treeJsonStr },
       { filename: DEFAULT_EXPORT_PARAMS.filenameTree, data: treeNwkStr },
       { filename: DEFAULT_EXPORT_PARAMS.filenameFasta, data: fastaStr },
-      { filename: DEFAULT_EXPORT_PARAMS.filenameInsertionsCsv, data: insertionsCsvStr },
-      { filename: DEFAULT_EXPORT_PARAMS.filenameErrorsCsv, data: errorsCsvStr },
     ]
 
     await saveZip({ filename, files })
diff --git a/packages_rs/nextclade-web/src/hooks/useRunAnalysis.ts b/packages_rs/nextclade-web/src/hooks/useRunAnalysis.ts
index 395bda249..daeed6c10 100644
--- a/packages_rs/nextclade-web/src/hooks/useRunAnalysis.ts
+++ b/packages_rs/nextclade-web/src/hooks/useRunAnalysis.ts
@@ -12,7 +12,6 @@ import { datasetCurrentAtom } from 'src/state/dataset.state'
 import { globalErrorAtom } from 'src/state/error.state'
 import {
   geneMapInputAtom,
-  qcConfigInputAtom,
   qrySeqInputsStorageAtom,
   refSeqInputAtom,
   refTreeInputAtom,
@@ -59,7 +58,6 @@ export function useRunAnalysis() {
           refSeq: getPromise(refSeqInputAtom),
           geneMap: getPromise(geneMapInputAtom),
           tree: getPromise(refTreeInputAtom),
-          qcConfig: getPromise(qcConfigInputAtom),
           virusProperties: getPromise(virusPropertiesInputAtom),
         }
 
diff --git a/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts b/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts
new file mode 100644
index 000000000..fb1ce5837
--- /dev/null
+++ b/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts
@@ -0,0 +1,108 @@
+import type { Subscription } from 'observable-fns'
+import { useRecoilCallback } from 'recoil'
+import { ErrorInternal } from 'src/helpers/ErrorInternal'
+import { axiosFetch } from 'src/io/axiosFetch'
+import {
+  autodetectResultByIndexAtom,
+  autodetectResultsAtom,
+  AutodetectRunState,
+  autodetectRunStateAtom,
+  minimizerIndexAtom,
+} from 'src/state/autodetect.state'
+import { minimizerIndexVersionAtom } from 'src/state/dataset.state'
+import { globalErrorAtom } from 'src/state/error.state'
+import { MinimizerIndexJson, MinimizerSearchRecord } from 'src/types'
+import { qrySeqInputsStorageAtom } from 'src/state/inputs.state'
+import { getQueryFasta } from 'src/workers/launchAnalysis'
+import { NextcladeSeqAutodetectWasmWorker } from 'src/workers/nextcladeAutodetect.worker'
+import { spawn } from 'src/workers/spawn'
+
+export function useRunSeqAutodetect() {
+  return useRecoilCallback(
+    ({ set, reset, snapshot }) =>
+      () => {
+        const { getPromise } = snapshot
+
+        set(autodetectRunStateAtom, AutodetectRunState.Started)
+
+        reset(minimizerIndexAtom)
+        reset(autodetectResultsAtom)
+        reset(autodetectRunStateAtom)
+
+        function onResult(results: MinimizerSearchRecord[]) {
+          results.forEach((res) => {
+            set(autodetectResultByIndexAtom(res.fastaRecord.index), res)
+          })
+        }
+
+        function onError(error: Error) {
+          set(autodetectRunStateAtom, AutodetectRunState.Failed)
+          set(globalErrorAtom, error)
+        }
+
+        function onComplete() {
+          set(autodetectRunStateAtom, AutodetectRunState.Done)
+        }
+
+        Promise.all([getPromise(qrySeqInputsStorageAtom), getPromise(minimizerIndexVersionAtom)])
+          .then(async ([qrySeqInputs, minimizerIndexVersion]) => {
+            if (!minimizerIndexVersion) {
+              throw new ErrorInternal('Tried to run minimizer search without minimizer index available')
+            }
+            const fasta = await getQueryFasta(qrySeqInputs)
+            const minimizerIndex: MinimizerIndexJson = await axiosFetch(minimizerIndexVersion.path)
+            set(minimizerIndexAtom, minimizerIndex)
+            return runAutodetect(fasta, minimizerIndex, { onResult, onError, onComplete })
+          })
+          .catch((error) => {
+            throw error
+          })
+      },
+    [],
+  )
+}
+
+interface Callbacks {
+  onResult: (r: MinimizerSearchRecord[]) => void
+  onError?: (error: Error) => void
+  onComplete?: () => void
+}
+
+async function runAutodetect(fasta: string, minimizerIndex: MinimizerIndexJson, callbacks: Callbacks) {
+  const worker = await SeqAutodetectWasmWorker.create(minimizerIndex)
+  await worker.autodetect(fasta, callbacks)
+  await worker.destroy()
+}
+
+export class SeqAutodetectWasmWorker {
+  private thread!: NextcladeSeqAutodetectWasmWorker
+  private subscription?: Subscription<MinimizerSearchRecord[]>
+
+  private constructor() {}
+
+  static async create(minimizerIndex: MinimizerIndexJson) {
+    const self = new SeqAutodetectWasmWorker()
+    await self.init(minimizerIndex)
+    return self
+  }
+
+  async init(minimizerIndex: MinimizerIndexJson) {
+    this.thread = await spawn<NextcladeSeqAutodetectWasmWorker>(
+      new Worker(new URL('src/workers/nextcladeAutodetect.worker.ts', import.meta.url), {
+        name: 'nextcladeAutodetectWorker',
+      }),
+    )
+
+    await this.thread.create(minimizerIndex)
+  }
+
+  async autodetect(fastaStr: string, { onResult, onError, onComplete }: Callbacks) {
+    this.subscription = this.thread.values().subscribe(onResult, onError, onComplete)
+    await this.thread.autodetect(fastaStr)
+  }
+
+  async destroy() {
+    this.subscription?.unsubscribe()
+    await this.thread.destroy()
+  }
+}
diff --git a/packages_rs/nextclade-web/src/hooks/useToggle.ts b/packages_rs/nextclade-web/src/hooks/useToggle.ts
index 1b51e6038..da562079c 100644
--- a/packages_rs/nextclade-web/src/hooks/useToggle.ts
+++ b/packages_rs/nextclade-web/src/hooks/useToggle.ts
@@ -1,4 +1,5 @@
 import { useCallback, useState } from 'react'
+import { RecoilState, useRecoilState } from 'recoil'
 
 export type VoidFunc = () => void
 
@@ -9,3 +10,11 @@ export function useToggle(initialState = false): [boolean, VoidFunc, VoidFunc, V
   const disable = useCallback(() => setState(false), [])
   return [state, toggle, enable, disable]
 }
+
+export function useRecoilToggle(recoilState: RecoilState<boolean>) {
+  const [state, setState] = useRecoilState(recoilState)
+  const toggle = useCallback(() => setState((state) => !state), [setState])
+  const enable = useCallback(() => setState(true), [setState])
+  const disable = useCallback(() => setState(false), [setState])
+  return { state, setState, toggle, enable, disable }
+}
diff --git a/packages_rs/nextclade-web/src/i18n/i18n.ts b/packages_rs/nextclade-web/src/i18n/i18n.ts
index 7dfa189e8..abb25b55c 100644
--- a/packages_rs/nextclade-web/src/i18n/i18n.ts
+++ b/packages_rs/nextclade-web/src/i18n/i18n.ts
@@ -1,11 +1,7 @@
-import { ElementType, FC } from 'react'
-
 import type { StrictOmit } from 'ts-essentials'
 import { get, isNil, mapValues } from 'lodash'
-
 import i18nOriginal, { i18n as I18N, Resource } from 'i18next'
 import { initReactI18next } from 'react-i18next'
-
 import { Settings as LuxonSettings } from 'luxon'
 import numbro from 'numbro'
 import { languages } from 'countries-list'
@@ -15,28 +11,6 @@ import prettyBytesOriginal, { Options as PrettyBytesOptionsOriginal } from 'pret
 // @ts-ignore
 import numbroLanguages from 'numbro/dist/languages.min'
 
-import CN from 'flag-icon-css/flags/1x1/cn.svg'
-import DE from 'flag-icon-css/flags/1x1/de.svg'
-import ES from 'flag-icon-css/flags/1x1/es.svg'
-import FR from 'flag-icon-css/flags/1x1/fr.svg'
-import GB from 'flag-icon-css/flags/1x1/gb.svg'
-import GR from 'flag-icon-css/flags/1x1/gr.svg'
-import ID from 'flag-icon-css/flags/1x1/id.svg'
-import IL from 'flag-icon-css/flags/1x1/il.svg'
-import IN from 'flag-icon-css/flags/1x1/in.svg'
-import IR from 'flag-icon-css/flags/1x1/ir.svg'
-import IT from 'flag-icon-css/flags/1x1/it.svg'
-import JP from 'flag-icon-css/flags/1x1/jp.svg'
-import KR from 'flag-icon-css/flags/1x1/kr.svg'
-import NL from 'flag-icon-css/flags/1x1/nl.svg'
-import PK from 'flag-icon-css/flags/1x1/pk.svg'
-import PT from 'flag-icon-css/flags/1x1/pt.svg'
-import RU from 'flag-icon-css/flags/1x1/ru.svg'
-import SA from 'flag-icon-css/flags/1x1/sa.svg'
-import TH from 'flag-icon-css/flags/1x1/th.svg'
-import TR from 'flag-icon-css/flags/1x1/tr.svg'
-import VN from 'flag-icon-css/flags/1x1/vn.svg'
-
 import ar from './resources/ar/common.json'
 import de from './resources/de/common.json'
 import el from './resources/el/common.json'
@@ -97,32 +71,32 @@ export interface Locale {
   readonly full: string
   readonly name: string
   readonly native: string
-  readonly Flag: ElementType
+  readonly rtl: number | undefined
 }
 
 export const locales: Record<LocaleKey, Locale> = {
-  en: { key: 'en', full: 'en-US', name: languages.en.name, native: languages.en.native, Flag: GB as FC },
-  ar: { key: 'ar', full: 'ar-SA', name: languages.ar.name, native: languages.ar.native, Flag: SA as FC },
-  de: { key: 'de', full: 'de-DE', name: languages.de.name, native: languages.de.native, Flag: DE as FC },
-  el: { key: 'el', full: 'el-GR', name: languages.el.name, native: languages.el.native, Flag: GR as FC },
-  es: { key: 'es', full: 'es-ES', name: languages.es.name, native: languages.es.native, Flag: ES as FC },
-  fa: { key: 'fa', full: 'fa-IR', name: languages.fa.name, native: languages.fa.native, Flag: IR as FC },
-  fr: { key: 'fr', full: 'fr-FR', name: languages.fr.name, native: languages.fr.native, Flag: FR as FC },
-  he: { key: 'he', full: 'he-IL', name: languages.he.name, native: languages.he.native, Flag: IL as FC },
-  hi: { key: 'hi', full: 'hi-IN', name: languages.hi.name, native: languages.hi.native, Flag: IN as FC },
-  id: { key: 'id', full: 'id-ID', name: languages.id.name, native: languages.id.native, Flag: ID as FC },
-  it: { key: 'it', full: 'it-IT', name: languages.it.name, native: languages.it.native, Flag: IT as FC },
-  ja: { key: 'ja', full: 'ja-JP', name: languages.ja.name, native: languages.ja.native, Flag: JP as FC },
-  ko: { key: 'ko', full: 'ko-KR', name: languages.ko.name, native: languages.ko.native, Flag: KR as FC },
-  nl: { key: 'nl', full: 'nl-NL', name: languages.nl.name, native: languages.nl.native, Flag: NL as FC },
-  pt: { key: 'pt', full: 'pt-PT', name: languages.pt.name, native: languages.pt.native, Flag: PT as FC },
-  ru: { key: 'ru', full: 'ru-RU', name: languages.ru.name, native: languages.ru.native, Flag: RU as FC },
-  ta: { key: 'ta', full: 'ta-IN', name: languages.ta.name, native: languages.ta.native, Flag: IN as FC },
-  th: { key: 'th', full: 'th-TH', name: languages.th.name, native: languages.th.native, Flag: TH as FC },
-  tr: { key: 'tr', full: 'tr-TR', name: languages.tr.name, native: languages.tr.native, Flag: TR as FC },
-  ur: { key: 'ur', full: 'ur-PK', name: languages.ur.name, native: languages.ur.native, Flag: PK as FC },
-  vi: { key: 'vi', full: 'vi-VN', name: languages.vi.name, native: languages.vi.native, Flag: VN as FC },
-  zh: { key: 'zh', full: 'zh-CN', name: languages.zh.name, native: languages.zh.native, Flag: CN as FC },
+  en: { key: 'en', full: 'en-US', name: languages.en.name, native: languages.en.native, rtl: languages.en.rtl },
+  ar: { key: 'ar', full: 'ar-SA', name: languages.ar.name, native: languages.ar.native, rtl: languages.ar.rtl },
+  de: { key: 'de', full: 'de-DE', name: languages.de.name, native: languages.de.native, rtl: languages.de.rtl },
+  el: { key: 'el', full: 'el-GR', name: languages.el.name, native: languages.el.native, rtl: languages.el.rtl },
+  es: { key: 'es', full: 'es-ES', name: languages.es.name, native: languages.es.native, rtl: languages.es.rtl },
+  fa: { key: 'fa', full: 'fa-IR', name: languages.fa.name, native: languages.fa.native, rtl: languages.fa.rtl },
+  fr: { key: 'fr', full: 'fr-FR', name: languages.fr.name, native: languages.fr.native, rtl: languages.fr.rtl },
+  he: { key: 'he', full: 'he-IL', name: languages.he.name, native: languages.he.native, rtl: languages.he.rtl },
+  hi: { key: 'hi', full: 'hi-IN', name: languages.hi.name, native: languages.hi.native, rtl: languages.hi.rtl },
+  id: { key: 'id', full: 'id-ID', name: languages.id.name, native: languages.id.native, rtl: languages.id.rtl },
+  it: { key: 'it', full: 'it-IT', name: languages.it.name, native: languages.it.native, rtl: languages.it.rtl },
+  ja: { key: 'ja', full: 'ja-JP', name: languages.ja.name, native: languages.ja.native, rtl: languages.ja.rtl },
+  ko: { key: 'ko', full: 'ko-KR', name: languages.ko.name, native: languages.ko.native, rtl: languages.ko.rtl },
+  nl: { key: 'nl', full: 'nl-NL', name: languages.nl.name, native: languages.nl.native, rtl: languages.nl.rtl },
+  pt: { key: 'pt', full: 'pt-PT', name: languages.pt.name, native: languages.pt.native, rtl: languages.pt.rtl },
+  ru: { key: 'ru', full: 'ru-RU', name: languages.ru.name, native: languages.ru.native, rtl: languages.ru.rtl },
+  ta: { key: 'ta', full: 'ta-IN', name: languages.ta.name, native: languages.ta.native, rtl: languages.ta.rtl },
+  th: { key: 'th', full: 'th-TH', name: languages.th.name, native: languages.th.native, rtl: languages.th.rtl },
+  tr: { key: 'tr', full: 'tr-TR', name: languages.tr.name, native: languages.tr.native, rtl: languages.tr.rtl },
+  ur: { key: 'ur', full: 'ur-PK', name: languages.ur.name, native: languages.ur.native, rtl: languages.ur.rtl },
+  vi: { key: 'vi', full: 'vi-VN', name: languages.vi.name, native: languages.vi.native, rtl: languages.vi.rtl },
+  zh: { key: 'zh', full: 'zh-CN', name: languages.zh.name, native: languages.zh.native, rtl: languages.zh.rtl },
 } as const
 
 export const localeKeys = Object.keys(locales)
diff --git a/packages_rs/nextclade-web/src/io/AlgorithmInput.ts b/packages_rs/nextclade-web/src/io/AlgorithmInput.ts
index 427e3c76e..ea6e33e8f 100644
--- a/packages_rs/nextclade-web/src/io/AlgorithmInput.ts
+++ b/packages_rs/nextclade-web/src/io/AlgorithmInput.ts
@@ -1,6 +1,6 @@
+import { uniqueId } from 'src/helpers/uniqueId'
 import { AlgorithmInput, AlgorithmInputType, Dataset } from 'src/types'
 import { axiosFetchRaw } from 'src/io/axiosFetch'
-
 import { readFile } from 'src/helpers/readFile'
 import { numbro } from 'src/i18n/i18n'
 
@@ -16,12 +16,20 @@ function formatBytes(bytes: number) {
 }
 
 export class AlgorithmInputFile implements AlgorithmInput {
+  public readonly uid = uniqueId()
+  public readonly path: string
   public readonly type: AlgorithmInputType = AlgorithmInputType.File as const
-
   private readonly file: File
 
   constructor(file: File) {
     this.file = file
+
+    // eslint-disable-next-line unicorn/prefer-ternary
+    if (this.file.webkitRelativePath.trim().length > 0) {
+      this.path = this.file.webkitRelativePath
+    } else {
+      this.path = `${this.uid}-${this.file.name}`
+    }
   }
 
   public get name(): string {
@@ -38,12 +46,14 @@ export class AlgorithmInputFile implements AlgorithmInput {
 }
 
 export class AlgorithmInputUrl implements AlgorithmInput {
+  public readonly uid = uniqueId()
+  public readonly path: string
   public readonly type: AlgorithmInputType = AlgorithmInputType.Url as const
-
   private readonly url: string
 
   constructor(url: string) {
     this.url = url
+    this.path = this.url
   }
 
   public get name(): string {
@@ -60,12 +70,14 @@ export class AlgorithmInputUrl implements AlgorithmInput {
 }
 
 export class AlgorithmInputString implements AlgorithmInput {
+  public readonly uid = uniqueId()
+  public readonly path: string
   public readonly type: AlgorithmInputType = AlgorithmInputType.String as const
-
   private readonly content: string
   private readonly contentName: string
 
   constructor(content: string, contentName?: string) {
+    this.path = `pasted-${this.uid}.fasta`
     this.content = content
     this.contentName = contentName ?? 'Pasted sequences'
   }
@@ -84,24 +96,25 @@ export class AlgorithmInputString implements AlgorithmInput {
 }
 
 export class AlgorithmInputDefault implements AlgorithmInput {
+  public readonly uid = uniqueId()
+  public readonly path: string
   public readonly type: AlgorithmInputType = AlgorithmInputType.Default as const
-
   public dataset: Dataset
 
   constructor(dataset: Dataset) {
     this.dataset = dataset
+    this.path = `Examples for '${this.dataset.path}'`
   }
 
   public get name(): string {
-    const { value, valueFriendly } = this.dataset.attributes.name
-    return `${valueFriendly ?? value} example sequences`
+    return this.path
   }
 
   public get description(): string {
-    return `${this.name}`
+    return this.name
   }
 
   public async getContent(): Promise<string> {
-    return axiosFetchRaw(this.dataset.files['sequences.fasta'])
+    return axiosFetchRaw(this.dataset.files.examples)
   }
 }
diff --git a/packages_rs/nextclade-web/src/io/axiosFetch.ts b/packages_rs/nextclade-web/src/io/axiosFetch.ts
index 19d6eea17..32daf0dc5 100644
--- a/packages_rs/nextclade-web/src/io/axiosFetch.ts
+++ b/packages_rs/nextclade-web/src/io/axiosFetch.ts
@@ -68,20 +68,25 @@ export async function axiosFetchRawMaybe(url?: string): Promise<string | undefin
   return axiosFetchRaw(url)
 }
 
-export async function axiosHead<TData = unknown>(
-  url: string | undefined,
-  options?: AxiosRequestConfig,
-): Promise<TData> {
+export async function axiosHead(url: string | undefined, options?: AxiosRequestConfig): Promise<AxiosResponse> {
   if (isNil(url)) {
     throw new ErrorInternal(`Attempted to fetch from an invalid URL: '${url}'`)
   }
 
-  let res
   try {
-    res = await axios.head(url, options)
+    return await axios.head(url, options)
   } catch (error) {
     throw axios.isAxiosError(error) ? new HttpRequestError(error) : sanitizeError(error)
   }
+}
 
-  return res.data as TData
+export async function axiosHeadOrUndefined(
+  url: string | undefined,
+  options?: AxiosRequestConfig,
+): Promise<AxiosResponse | undefined> {
+  try {
+    return await axiosHead(url, options)
+  } catch {
+    return undefined
+  }
 }
diff --git a/packages_rs/nextclade-web/src/io/fetchDatasets.ts b/packages_rs/nextclade-web/src/io/fetchDatasets.ts
index 46eb0da9b..be86b86cd 100644
--- a/packages_rs/nextclade-web/src/io/fetchDatasets.ts
+++ b/packages_rs/nextclade-web/src/io/fetchDatasets.ts
@@ -1,64 +1,137 @@
+/* eslint-disable prefer-destructuring */
 import type { ParsedUrlQuery } from 'querystring'
+import { findSimilarStrings } from 'src/helpers/string'
+import { axiosHeadOrUndefined } from 'src/io/axiosFetch'
+import { isGithubUrlOrShortcut, parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub'
 
 import { Dataset } from 'src/types'
 import {
   fetchDatasetsIndex,
   filterDatasets,
   findDataset,
+  getCompatibleMinimizerIndexVersion,
   getLatestCompatibleEnabledDatasets,
 } from 'src/io/fetchDatasetsIndex'
 import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe'
 import { useRecoilValue, useSetRecoilState } from 'recoil'
-import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom, datasetUpdatedAtom } from 'src/state/dataset.state'
+import {
+  datasetCurrentAtom,
+  datasetsAtom,
+  datasetServerUrlAtom,
+  datasetUpdatedAtom,
+  minimizerIndexVersionAtom,
+} from 'src/state/dataset.state'
 import { useQuery } from 'react-query'
 import { isNil } from 'lodash'
+import urljoin from 'url-join'
+import { URL_GITHUB_DATA_RAW } from 'src/constants'
 
 export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets: Dataset[]) {
   // Retrieve dataset-related URL params and try to find a dataset based on these params
-  const datasetName = getQueryParamMaybe(urlQuery, 'dataset-name')
+  const name = getQueryParamMaybe(urlQuery, 'dataset-name')
 
-  if (!datasetName) {
+  if (!name) {
     return undefined
   }
 
-  const datasetRef = getQueryParamMaybe(urlQuery, 'dataset-reference')
-  const datasetTag = getQueryParamMaybe(urlQuery, 'dataset-tag')
+  const tag = getQueryParamMaybe(urlQuery, 'dataset-tag')
+
+  const dataset = findDataset(datasets, name, tag)
 
-  const dataset = findDataset(datasets, datasetName, datasetRef, datasetTag)
   if (!dataset) {
+    const names = datasets.map((dataset) => dataset.path)
+    const suggestions = findSimilarStrings(names, name)
+      .slice(0, 10)
+      .map((s) => `'${s}'`)
+      .join(', ')
+    const tagMsg = tag ? ` and tag '${tag}` : ''
     throw new Error(
-      `Incorrect URL parameters: unable to find dataset with name='${datasetName}', ref='${datasetRef ?? ''}', tag='${
-        datasetTag ?? ''
-      }' `,
+      `Incorrect URL parameters: unable to find the dataset with name='${name}'${tagMsg}. Did you mean one of: ${suggestions}`,
     )
   }
 
   return dataset
 }
 
-export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServerUrlDefault: string) {
-  const datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server') ?? datasetServerUrlDefault
+export async function getGithubDatasetServerUrl(): Promise<string | undefined> {
+  const BRANCH_NAME = process.env.BRANCH_NAME
+  if (!BRANCH_NAME) {
+    return undefined
+  }
+
+  const githubDatasetServerUrl = urljoin(URL_GITHUB_DATA_RAW, BRANCH_NAME, 'data_output')
+  const githubIndexJsonUrl = urljoin(githubDatasetServerUrl, 'index.json')
+
+  const headRes = await axiosHeadOrUndefined(githubIndexJsonUrl)
+
+  if (headRes) {
+    return githubDatasetServerUrl
+  }
+
+  return undefined
+}
+
+export function toAbsoluteUrl(url: string): string {
+  if (typeof window !== 'undefined' && url.slice(0) === '/') {
+    return urljoin(window.location.origin, url)
+  }
+  return url
+}
+
+export async function getDatasetServerUrl(urlQuery: ParsedUrlQuery) {
+  // Get dataset URL from query URL params.
+  let datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server')
+
+  // If the URL is formatted as a GitHub URL or as a GitHub URL shortcut, use it without any checking
+  if (datasetServerUrl && isGithubUrlOrShortcut(datasetServerUrl)) {
+    const { owner, repo, branch, path } = await parseGitHubRepoUrlOrShortcut(datasetServerUrl)
+    return urljoin('https://raw.githubusercontent.com', owner, repo, branch, path)
+  }
+
+  // If requested to try GitHub-hosted datasets either using `DATA_TRY_GITHUB_BRANCH` env var (e.g. from
+  // `.env` file), or using `&dataset-server=gh` or `&dataset-server=github` URL parameters, then check if the
+  // corresponding branch in the default data repo on GitHub contains an `index.json` file. And and if yes, use it.
+  const datasetServerTryGithubBranch =
+    (isNil(datasetServerUrl) && process.env.DATA_TRY_GITHUB_BRANCH === '1') ||
+    (datasetServerUrl && ['gh', 'github'].includes(datasetServerUrl))
+  if (datasetServerTryGithubBranch) {
+    const githubDatasetServerUrl = await getGithubDatasetServerUrl()
+    if (githubDatasetServerUrl) {
+      datasetServerUrl = githubDatasetServerUrl
+    }
+  }
+
+  // If none of the above, use hardcoded default URL (from `.env` file)
+  datasetServerUrl = datasetServerUrl ?? process.env.DATA_FULL_DOMAIN ?? '/'
+
+  // If the URL happens to be a relative path, then convert to absolute URL (on the app's current host)
+  return toAbsoluteUrl(datasetServerUrl)
+}
 
+export async function initializeDatasets(datasetServerUrl: string, urlQuery: ParsedUrlQuery = {}) {
   const datasetsIndexJson = await fetchDatasetsIndex(datasetServerUrl)
 
-  const { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly } =
-    getLatestCompatibleEnabledDatasets(datasetServerUrl, datasetsIndexJson)
+  const { datasets } = getLatestCompatibleEnabledDatasets(datasetServerUrl, datasetsIndexJson)
+
+  const minimizerIndexVersion = await getCompatibleMinimizerIndexVersion(datasetServerUrl, datasetsIndexJson)
 
   // Check if URL params specify dataset params and try to find the corresponding dataset
   const currentDataset = await getDatasetFromUrlParams(urlQuery, datasets)
 
-  return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset }
+  return { datasets, currentDataset, minimizerIndexVersion }
 }
 
 /** Refetch dataset index periodically and update the local copy of if */
 export function useUpdatedDatasetIndex() {
-  const setDatasetsState = useSetRecoilState(datasetsAtom)
   const datasetServerUrl = useRecoilValue(datasetServerUrlAtom)
+  const setDatasetsState = useSetRecoilState(datasetsAtom)
+  const setMinimizerIndexVersion = useSetRecoilState(minimizerIndexVersionAtom)
   useQuery(
     'refetchDatasetIndex',
     async () => {
-      const { currentDataset: _, ...datasetsState } = await initializeDatasets({}, datasetServerUrl)
-      setDatasetsState(datasetsState)
+      const { currentDataset: _, minimizerIndexVersion, ...datasets } = await initializeDatasets(datasetServerUrl)
+      setDatasetsState(datasets)
+      setMinimizerIndexVersion(minimizerIndexVersion)
     },
     {
       suspense: false,
@@ -84,14 +157,13 @@ export function useUpdatedDataset() {
   useQuery(
     'currentDatasetState',
     async () => {
-      const name = datasetCurrent?.attributes.name.value
-      const refAccession = datasetCurrent?.attributes.reference.value
-      const tag = datasetCurrent?.attributes.tag.value
-      if (!isNil(name) && !isNil(refAccession) && !isNil(tag)) {
-        const candidateDatasets = filterDatasets(datasets, name, refAccession)
+      const path = datasetCurrent?.path
+      const updatedAt = datasetCurrent?.version?.updatedAt
+      if (!isNil(updatedAt)) {
+        const candidateDatasets = filterDatasets(datasets, path)
         const updatedDataset = candidateDatasets.find((candidate) => {
-          const candidateTag = candidate.attributes.tag.value
-          return candidateTag > tag
+          const candidateTag = candidate.version?.updatedAt
+          return candidateTag && candidateTag > updatedAt
         })
         setDatasetUpdated(updatedDataset)
       }
diff --git a/packages_rs/nextclade-web/src/io/fetchDatasetsIndex.ts b/packages_rs/nextclade-web/src/io/fetchDatasetsIndex.ts
index e83210fac..c1690972f 100644
--- a/packages_rs/nextclade-web/src/io/fetchDatasetsIndex.ts
+++ b/packages_rs/nextclade-web/src/io/fetchDatasetsIndex.ts
@@ -1,82 +1,58 @@
-import { first, mapValues, sortBy } from 'lodash'
+import { head, mapValues, sortBy, sortedUniq } from 'lodash'
 import semver from 'semver'
-import { ErrorInternal } from 'src/helpers/ErrorInternal'
+import { takeFirstMaybe } from 'src/helpers/takeFirstMaybe'
 import urljoin from 'url-join'
 
-import { Dataset, DatasetsIndexV2Json } from 'src/types'
+import { Dataset, DatasetFiles, DatasetsIndexJson, DatasetsIndexV2Json, MinimizerIndexVersion } from 'src/types'
 import { axiosFetch } from 'src/io/axiosFetch'
 
-const DATA_INDEX_FILE = 'index_v2.json'
-const thisVersion = process.env.PACKAGE_VERSION ?? ''
+const MINIMIZER_INDEX_ALGO_VERSION = 'v1'
+const PACKAGE_VERSION = process.env.PACKAGE_VERSION ?? ''
 
 export function isEnabled(dataset: Dataset) {
   return dataset.enabled
 }
 
-export function isCompatible(dataset: Dataset) {
-  const { min, max } = dataset.compatibility.nextcladeWeb
-  return semver.gte(thisVersion, min ?? thisVersion) && semver.lte(thisVersion, max ?? thisVersion)
+export function isCompatible(dataset: Dataset): boolean {
+  const minVersion = dataset.version?.compatibility?.web ?? PACKAGE_VERSION
+  return semver.gte(PACKAGE_VERSION, minVersion)
 }
 
-export function isLatest(dataset: Dataset) {
-  return dataset.attributes.tag.isDefault
-}
-
-export function areAllAttributesDefault(dataset: Dataset) {
-  return Object.values(dataset.attributes).every((attr) => attr.isDefault)
+export function isLatest(dataset: Dataset): boolean {
+  // Dataset is latest if dataset's version is the last entry in the array of all versions
+  return head(sortedUniq(dataset.versions ?? []).map((v) => v.updatedAt)) === dataset.version?.updatedAt
 }
 
 export function fileUrlsToAbsolute(datasetServerUrl: string, dataset: Dataset): Dataset {
-  const files = mapValues(dataset.files, (file) => urljoin(datasetServerUrl, file))
-  return { ...dataset, files }
-}
-
-export function getDefaultDataset(datasets: Dataset[]) {
-  const defaultDatasetCandidates = datasets.filter(areAllAttributesDefault)
-  if (defaultDatasetCandidates.length === 0) {
-    throw new ErrorInternal('Unable to find default dataset')
-  } else if (defaultDatasetCandidates.length > 1) {
-    throw new ErrorInternal('Multiple candidates found for default dataset')
+  const restFilesAbs = mapValues(dataset.files, (file) =>
+    file ? urljoin(datasetServerUrl, dataset.path, dataset.version?.tag ?? '', file) : undefined,
+  ) as DatasetFiles
+  const files = {
+    ...restFilesAbs,
   }
-  return datasets[0]
+  return { ...dataset, files }
 }
 
 export function getLatestCompatibleEnabledDatasets(datasetServerUrl: string, datasetsIndexJson: DatasetsIndexV2Json) {
-  const datasets = datasetsIndexJson.datasets
-    .filter(isEnabled)
-    .filter(isCompatible)
-    .filter(isLatest)
+  const datasets = datasetsIndexJson.collections
+    .flatMap((collection) => collection.datasets.filter(isEnabled).filter(isCompatible).filter(isLatest))
     .map((dataset) => fileUrlsToAbsolute(datasetServerUrl, dataset))
-
-  const defaultDataset = getDefaultDataset(datasets)
-
-  const { value, valueFriendly } = defaultDataset.attributes.name
-
-  return {
-    datasets,
-    defaultDataset,
-    defaultDatasetName: value,
-    defaultDatasetNameFriendly: valueFriendly ?? value,
-  }
+  return { datasets }
 }
 
 /** Find the latest dataset, optionally by name, ref and tag */
-export function findDataset(datasets: Dataset[], name?: string, refAccession?: string, tag?: string) {
-  const datasetsFound = filterDatasets(datasets, name, refAccession, tag)
-  return first(sortBy(datasetsFound, (dataset) => dataset.attributes.tag))
+export function findDataset(datasets: Dataset[], name?: string, tag?: string) {
+  const datasetsFound = filterDatasets(datasets, name, tag)
+  return head(sortBy(datasetsFound, (dataset) => dataset.version?.tag ?? ''))
 }
 
 /** Find the datasets given name, ref and tag */
-export function filterDatasets(datasets: Dataset[], name?: string, refAccession?: string, tag?: string) {
+export function filterDatasets(datasets: Dataset[], name?: string, tag?: string) {
   return datasets.filter((dataset) => {
-    let isMatch = dataset.attributes.name.value === name
-
-    if (refAccession) {
-      isMatch = isMatch && dataset.attributes.reference.value === refAccession
-    }
+    let isMatch = dataset.path === name
 
     if (tag) {
-      isMatch = isMatch && dataset.attributes.tag.value === tag
+      isMatch = isMatch && dataset.version?.tag === tag
     }
 
     return isMatch
@@ -84,5 +60,23 @@ export function filterDatasets(datasets: Dataset[], name?: string, refAccession?
 }
 
 export async function fetchDatasetsIndex(datasetServerUrl: string) {
-  return axiosFetch<DatasetsIndexV2Json>(urljoin(datasetServerUrl, DATA_INDEX_FILE))
+  return axiosFetch<DatasetsIndexJson>(urljoin(datasetServerUrl, 'index.json'))
+}
+
+export async function getCompatibleMinimizerIndexVersion(
+  datasetServerUrl: string,
+  datasetsIndexJson: DatasetsIndexV2Json,
+): Promise<MinimizerIndexVersion | undefined> {
+  let candidates = datasetsIndexJson.minimizerIndex?.filter(
+    (minimizerIndexVer) => MINIMIZER_INDEX_ALGO_VERSION >= minimizerIndexVer.version,
+  )
+  candidates = sortBy(candidates, (candidate) => candidate.version).reverse()
+  const index = takeFirstMaybe(candidates)
+  if (index) {
+    return {
+      ...index,
+      path: urljoin(datasetServerUrl, index.path),
+    }
+  }
+  return undefined
 }
diff --git a/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromGithub.ts b/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromGithub.ts
index f648a3a0a..050ad9b45 100644
--- a/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromGithub.ts
+++ b/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromGithub.ts
@@ -1,4 +1,3 @@
-/* eslint-disable prefer-template */
 import { isNil } from 'lodash'
 import pMemoize from 'p-memoize'
 
@@ -95,19 +94,13 @@ export function isGithubUrlOrShortcut(url: string): boolean {
   return !isNil(/^(github:|gh:|https?:\/\/github.com).*/.exec(url))
 }
 
-const GITHUB_URL_EXAMPLE =
-  'https://github.com/nextstrain/nextclade_data/tree/6ab9560b86e3384792235fa72d1c3eaf30e71213/data/datasets/flu_yam_ha/references/JN993010/versions/2022-07-27T12:00:00Z/files/'
-
-const GITHUB_URL_ERROR_HINTS = ` Check the correctness of the URL. If you don't intend to use custom dataset, remove the parameter from the address or restart the application. An example of a correct URL: '${GITHUB_URL_EXAMPLE}'`
+const GITHUB_URL_ERROR_HINTS = ` Check the correctness of the URL. If it's a full GitHub URL, please try to navigate to it - you should see a GitHub repo branch with your files listed. If it's a GitHub URL shortcut, please double check the syntax. See documentation for the correct syntax and examples. If you don't intend to use custom datasets, remove the parameter from the address or restart the application.`
 
 export class ErrorDatasetGithubUrlPatternInvalid extends Error {
   public readonly datasetGithubUrl: string
 
   constructor(datasetGithubUrl: string) {
-    super(
-      `Dataset GitHub URL (provided using 'dataset-url' URL parameter) is invalid: '${datasetGithubUrl}'.` +
-        GITHUB_URL_ERROR_HINTS,
-    )
+    super(`Dataset GitHub URL is invalid: '${datasetGithubUrl}'.${GITHUB_URL_ERROR_HINTS}`)
     this.datasetGithubUrl = datasetGithubUrl
   }
 }
@@ -122,9 +115,7 @@ export class ErrorDatasetGithubUrlComponentsInvalid extends Error {
       .join(',')
 
     super(
-      `Dataset GitHub URL (provided using 'dataset-url' URL parameter) is invalid: '${datasetGithubUrl}'.` +
-        ` Detected the following components ${componentsListStr}.` +
-        GITHUB_URL_ERROR_HINTS,
+      `Dataset GitHub URL is invalid: '${datasetGithubUrl}'. Detected the following components ${componentsListStr}.${GITHUB_URL_ERROR_HINTS}`,
     )
     this.datasetGithubUrl = datasetGithubUrl
     this.parsedRepoUrlComponents = parsedRepoUrlComponents
diff --git a/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromUrl.ts b/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromUrl.ts
index b9e4a7fc2..583af2baf 100644
--- a/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromUrl.ts
+++ b/packages_rs/nextclade-web/src/io/fetchSingleDatasetFromUrl.ts
@@ -1,9 +1,9 @@
 import urljoin from 'url-join'
 import { concurrent } from 'fasy'
 
-import { Dataset, DatasetTag } from 'src/types'
+import { Dataset, VirusProperties } from 'src/types'
 import { removeTrailingSlash } from 'src/io/url'
-import { axiosFetchOrUndefined, axiosHead } from 'src/io/axiosFetch'
+import { axiosFetch, axiosHead } from 'src/io/axiosFetch'
 import { sanitizeError } from 'src/helpers/sanitizeError'
 
 export async function fetchSingleDatasetFromUrl(
@@ -12,53 +12,14 @@ export async function fetchSingleDatasetFromUrl(
 ) {
   const datasetRootUrl = removeTrailingSlash(datasetRootUrl_)
 
-  const tag = await axiosFetchOrUndefined<DatasetTag>(urljoin(datasetRootUrl, 'tag.json'))
-
+  const pathogen = await axiosFetch<VirusProperties>(urljoin(datasetRootUrl, 'pathogen.json'))
   const currentDataset: Dataset = {
-    enabled: true,
-    attributes: {
-      name: {
-        value: tag?.attributes?.name?.value ?? meta?.datasetGithubRepo ?? 'untitled-dataset',
-        valueFriendly: tag?.attributes?.name?.valueFriendly ?? meta?.datasetGithubRepo ?? 'Untitled dataset',
-        isDefault: true,
-      },
-      reference: {
-        value: tag?.attributes?.reference?.value ?? 'unknown',
-        valueFriendly: tag?.attributes?.reference?.valueFriendly ?? 'unknown',
-        isDefault: true,
-      },
-      tag: {
-        value: tag?.attributes?.tag?.value ?? 'unknown',
-        valueFriendly: tag?.attributes?.tag?.valueFriendly ?? 'unknown',
-        isDefault: true,
-      },
-      url: {
-        value: tag?.attributes?.url?.value ?? meta?.datasetGithubRepo ?? meta?.datasetOriginalUrl ?? datasetRootUrl,
-        valueFriendly: tag?.attributes?.url?.valueFriendly ?? meta?.datasetGithubRepo,
-        isDefault: true,
-      },
-    },
-    comment: tag?.comment ?? '',
-    compatibility: tag?.compatibility ?? {
-      nextcladeCli: {
-        min: '1.10.0',
-      },
-      nextcladeWeb: {
-        min: '1.13.0',
-      },
-    },
-    files: {
-      'genemap.gff': urljoin(datasetRootUrl, 'genemap.gff'),
-      'primers.csv': urljoin(datasetRootUrl, 'primers.csv'),
-      'qc.json': urljoin(datasetRootUrl, 'qc.json'),
-      'reference.fasta': urljoin(datasetRootUrl, 'reference.fasta'),
-      'sequences.fasta': urljoin(datasetRootUrl, 'sequences.fasta'),
-      'tag.json': urljoin(datasetRootUrl, 'tag.json'),
-      'tree.json': urljoin(datasetRootUrl, 'tree.json'),
-      'virus_properties.json': urljoin(datasetRootUrl, 'virus_properties.json'),
+    path: datasetRootUrl,
+    capabilities: {
+      primers: false,
+      qc: [],
     },
-    params: tag?.params ?? { defaultGene: undefined, geneOrderPreference: undefined },
-    zipBundle: tag?.zipBundle ?? urljoin(datasetRootUrl, 'dataset.zip'),
+    ...pathogen,
   }
 
   const datasets = [currentDataset]
diff --git a/packages_rs/nextclade-web/src/lib.rs b/packages_rs/nextclade-web/src/lib.rs
index f539b360f..c2c9a50cb 100644
--- a/packages_rs/nextclade-web/src/lib.rs
+++ b/packages_rs/nextclade-web/src/lib.rs
@@ -1 +1,9 @@
+use wasm_bindgen::prelude::wasm_bindgen;
+
+#[wasm_bindgen(start)]
+pub fn main() {
+  wasm_logger::init(wasm_logger::Config::default());
+  console_error_panic_hook::set_once();
+}
+
 mod wasm;
diff --git a/packages_rs/nextclade-web/src/pages/_app.tsx b/packages_rs/nextclade-web/src/pages/_app.tsx
index 1b4cc48a8..d92728277 100644
--- a/packages_rs/nextclade-web/src/pages/_app.tsx
+++ b/packages_rs/nextclade-web/src/pages/_app.tsx
@@ -4,7 +4,7 @@ import 'css.escape'
 
 import { isEmpty, isNil } from 'lodash'
 import React, { useEffect, Suspense, useMemo } from 'react'
-import { RecoilRoot, useRecoilCallback, useRecoilState, useRecoilValue } from 'recoil'
+import { RecoilEnv, RecoilRoot, useRecoilCallback, useRecoilState, useRecoilValue } from 'recoil'
 import { AppProps } from 'next/app'
 import { useRouter } from 'next/router'
 import dynamic from 'next/dynamic'
@@ -16,7 +16,6 @@ import { createInputFastasFromUrlParam, createInputFromUrlParamMaybe } from 'src
 import { globalErrorAtom } from 'src/state/error.state'
 import {
   geneMapInputAtom,
-  qcConfigInputAtom,
   qrySeqInputsStorageAtom,
   refSeqInputAtom,
   refTreeInputAtom,
@@ -40,7 +39,7 @@ import { ReactQueryDevtools } from 'react-query/devtools'
 
 import { DOMAIN_STRIPPED } from 'src/constants'
 import { parseUrl } from 'src/helpers/parseUrl'
-import { initializeDatasets } from 'src/io/fetchDatasets'
+import { getDatasetServerUrl, initializeDatasets } from 'src/io/fetchDatasets'
 import { fetchSingleDataset } from 'src/io/fetchSingleDataset'
 import { ErrorPopup } from 'src/components/Error/ErrorPopup'
 import Loading from 'src/components/Loading/Loading'
@@ -49,12 +48,19 @@ import { SEO } from 'src/components/Common/SEO'
 import { Plausible } from 'src/components/Common/Plausible'
 import i18n, { changeLocale, getLocaleWithKey } from 'src/i18n/i18n'
 import { theme } from 'src/theme'
-import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom } from 'src/state/dataset.state'
+import {
+  datasetCurrentAtom,
+  datasetsAtom,
+  datasetServerUrlAtom,
+  minimizerIndexVersionAtom,
+} from 'src/state/dataset.state'
 import { ErrorBoundary } from 'src/components/Error/ErrorBoundary'
 import { PreviewWarning } from 'src/components/Common/PreviewWarning'
 
 import 'src/styles/global.scss'
 
+RecoilEnv.RECOIL_DUPLICATE_ATOM_KEY_CHECKING_ENABLED = false
+
 /**
  * Dummy component that allows to set recoil state asynchronously. Needed because RecoilRoot's initializeState
  * currently only handles synchronous update and any calls to set() from promises have no effect
@@ -95,11 +101,15 @@ export function RecoilStateInitializer() {
         const datasetInfo = await fetchSingleDataset(urlQuery)
 
         if (!isNil(datasetInfo)) {
-          return datasetInfo
+          const { datasets, currentDataset } = datasetInfo
+          return { datasets, currentDataset, minimizerIndexVersion: undefined }
         }
 
-        const datasetServerUrlDefault = await getPromise(datasetServerUrlAtom)
-        return initializeDatasets(urlQuery, datasetServerUrlDefault)
+        const datasetServerUrl = await getDatasetServerUrl(urlQuery)
+        set(datasetServerUrlAtom, datasetServerUrl)
+
+        const { datasets, currentDataset, minimizerIndexVersion } = await initializeDatasets(datasetServerUrl, urlQuery)
+        return { datasets, currentDataset, minimizerIndexVersion }
       })
       .catch((error) => {
         // Dataset error is fatal and we want error to be handled in the ErrorBoundary
@@ -107,17 +117,12 @@ export function RecoilStateInitializer() {
         set(globalErrorAtom, sanitizeError(error))
         throw error
       })
-      .then(async ({ datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset }) => {
-        set(datasetsAtom, {
-          datasets,
-          defaultDataset,
-          defaultDatasetName,
-          defaultDatasetNameFriendly,
-        })
-
+      .then(async ({ datasets, currentDataset, minimizerIndexVersion }) => {
+        set(datasetsAtom, { datasets })
         const previousDataset = await getPromise(datasetCurrentAtom)
         const dataset = currentDataset ?? previousDataset
         set(datasetCurrentAtom, dataset)
+        set(minimizerIndexVersionAtom, minimizerIndexVersion)
         return dataset
       })
       .then((dataset) => {
@@ -127,11 +132,10 @@ export function RecoilStateInitializer() {
           set(qrySeqInputsStorageAtom, inputFastas)
         }
 
-        set(refSeqInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-root-seq'))
-        set(geneMapInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-gene-map'))
+        set(refSeqInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-ref'))
+        set(geneMapInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-annotation'))
         set(refTreeInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-tree'))
-        set(qcConfigInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-qc-config'))
-        set(virusPropertiesInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-virus-properties'))
+        set(virusPropertiesInputAtom, createInputFromUrlParamMaybe(urlQuery, 'input-pathogen-json'))
 
         if (!isEmpty(inputFastas)) {
           run()
diff --git a/packages_rs/nextclade-web/src/pages/_error.tsx b/packages_rs/nextclade-web/src/pages/_error.tsx
index 423c37e6c..4c50d1e22 100644
--- a/packages_rs/nextclade-web/src/pages/_error.tsx
+++ b/packages_rs/nextclade-web/src/pages/_error.tsx
@@ -5,9 +5,8 @@ import get from 'lodash/get'
 
 import { ErrorContent } from 'src/components/Error/ErrorContent'
 import { RestartButton } from 'src/components/Error/ErrorStyles'
-import { LayoutResults } from 'src/components/Layout/LayoutResults'
+import { Layout } from 'src/components/Layout/Layout'
 import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
-import { MainSectionTitle } from 'src/components/Main/MainSectionTitle'
 import styled from 'styled-components'
 
 export const Container = styled(ContainerBase)`
@@ -70,10 +69,8 @@ function ErrorPage({ statusCode, title, error }: ErrorPageProps) {
   }, [error])
 
   return (
-    <LayoutResults>
+    <Layout>
       <MainContent>
-        <MainSectionTitle />
-
         <Row noGutters>
           <Col className="text-center text-danger">
             <h2>{titleText}</h2>
@@ -88,7 +85,7 @@ function ErrorPage({ statusCode, title, error }: ErrorPageProps) {
           </Col>
         </Row>
       </MainContent>
-    </LayoutResults>
+    </Layout>
   )
 }
 
diff --git a/packages_rs/nextclade-web/src/state/autodetect.state.ts b/packages_rs/nextclade-web/src/state/autodetect.state.ts
new file mode 100644
index 000000000..5697f4875
--- /dev/null
+++ b/packages_rs/nextclade-web/src/state/autodetect.state.ts
@@ -0,0 +1,133 @@
+/* eslint-disable no-loops/no-loops */
+import unique from 'fork-ts-checker-webpack-plugin/lib/utils/array/unique'
+import { isEmpty, isNil } from 'lodash'
+import { atom, atomFamily, DefaultValue, selector, selectorFamily } from 'recoil'
+import type { MinimizerIndexJson, MinimizerSearchRecord } from 'src/types'
+import { isDefaultValue } from 'src/state/utils/isDefaultValue'
+
+export const minimizerIndexAtom = atom<MinimizerIndexJson>({
+  key: 'minimizerIndexAtom',
+})
+
+const autodetectResultInternalAtom = atomFamily<MinimizerSearchRecord, number>({
+  key: 'autodetectResultInternalAtom',
+})
+
+export const autodetectResultIndicesAtom = atom<number[]>({
+  key: 'autodetectResultIndicesAtom',
+  default: [],
+})
+
+export const autodetectResultByIndexAtom = selectorFamily<MinimizerSearchRecord, number>({
+  key: 'autodetectResultByIndexAtom',
+
+  get:
+    (index: number) =>
+    ({ get }): MinimizerSearchRecord => {
+      return get(autodetectResultInternalAtom(index))
+    },
+
+  set:
+    (index) =>
+    ({ set, reset }, result: MinimizerSearchRecord | DefaultValue) => {
+      if (isDefaultValue(result)) {
+        reset(autodetectResultInternalAtom(index))
+        reset(autodetectResultIndicesAtom)
+      } else {
+        set(autodetectResultInternalAtom(index), result)
+
+        // Add to the list of indices
+        set(autodetectResultIndicesAtom, (prev) => {
+          if (result) {
+            return [...prev, result.fastaRecord.index]
+          }
+          return prev
+        })
+      }
+    },
+})
+
+// Dataset ID to use for when dataset is not autodetected
+export const DATASET_ID_UNDETECTED = 'undetected'
+
+export function groupByDatasets(records: MinimizerSearchRecord[]): Record<string, MinimizerSearchRecord[]> {
+  const names = unique(records.flatMap((record) => record.result.datasets.map((dataset) => dataset.name)))
+  let byDataset = {}
+  for (const name of names) {
+    const selectedRecords = records.filter((record) => record.result.datasets.some((dataset) => dataset.name === name))
+    byDataset = { ...byDataset, [name]: selectedRecords }
+  }
+  return byDataset
+}
+
+// Select autodetect results by dataset name
+export const autodetectResultsByDatasetAtom = selectorFamily<MinimizerSearchRecord[] | undefined, string>({
+  key: 'autodetectResultByDatasetAtom',
+
+  get:
+    (datasetId: string) =>
+    ({ get }): MinimizerSearchRecord[] | undefined => {
+      const records = get(autodetectResultsAtom)
+      if (isNil(records)) {
+        return undefined
+      }
+
+      if (datasetId === DATASET_ID_UNDETECTED) {
+        return records.filter((record) => isEmpty(record.result.datasets))
+      }
+
+      return records.filter((record) => record.result.datasets.some((dataset) => dataset.name === datasetId))
+    },
+})
+
+export const autodetectResultsAtom = selector<MinimizerSearchRecord[] | undefined>({
+  key: 'autodetectResultsAtom',
+
+  get({ get }): MinimizerSearchRecord[] | undefined {
+    const indices = get(autodetectResultIndicesAtom)
+    if (indices.length === 0) {
+      return undefined
+    }
+    return indices.map((index) => get(autodetectResultByIndexAtom(index)))
+  },
+
+  set({ get, set, reset }, results: MinimizerSearchRecord[] | DefaultValue | undefined) {
+    const seqIndices = get(autodetectResultIndicesAtom)
+
+    // Remove all results
+    seqIndices.forEach((index) => {
+      reset(autodetectResultByIndexAtom(index))
+    })
+
+    // If the operation is not 'reset', add the new items
+    if (!isDefaultValue(results) && !isNil(results)) {
+      results.forEach((result) => set(autodetectResultByIndexAtom(result.fastaRecord.index), result))
+    }
+  },
+})
+
+export const numberAutodetectResultsAtom = selector<number>({
+  key: 'numberAutodetectResultsAtom',
+  get({ get }) {
+    return (get(autodetectResultsAtom) ?? []).length
+  },
+})
+
+export const hasAutodetectResultsAtom = selector<boolean>({
+  key: 'hasAutodetectResultsAtom',
+  get({ get }) {
+    return get(numberAutodetectResultsAtom) > 0
+  },
+})
+
+export enum AutodetectRunState {
+  Idle = 'Idle',
+  Started = 'Started',
+  Failed = 'Failed',
+  Done = 'Done',
+}
+
+export const autodetectRunStateAtom = atom<AutodetectRunState>({
+  key: 'autodetectRunStateAtom',
+  default: AutodetectRunState.Idle,
+})
diff --git a/packages_rs/nextclade-web/src/state/dataset.state.ts b/packages_rs/nextclade-web/src/state/dataset.state.ts
index 276464af3..880acf271 100644
--- a/packages_rs/nextclade-web/src/state/dataset.state.ts
+++ b/packages_rs/nextclade-web/src/state/dataset.state.ts
@@ -1,36 +1,22 @@
 import { isNil } from 'lodash'
 import { atom, DefaultValue, selector } from 'recoil'
-import urljoin from 'url-join'
 
-import type { Dataset } from 'src/types'
-import { GENE_OPTION_NUC_SEQUENCE } from 'src/constants'
-import { inputResetAtom } from 'src/state/inputs.state'
+import type { Dataset, MinimizerIndexVersion } from 'src/types'
+// import { GENE_OPTION_NUC_SEQUENCE } from 'src/constants'
 import { persistAtom } from 'src/state/persist/localStorage'
-import { viewedGeneAtom } from 'src/state/seqViewSettings.state'
+// import { viewedGeneAtom } from 'src/state/seqViewSettings.state'
 import { isDefaultValue } from 'src/state/utils/isDefaultValue'
 import { areDatasetsEqual } from 'src/types'
 
-export function getDefaultDatasetServer(): string {
-  let datasetServerUrl = process.env.DATA_FULL_DOMAIN ?? '/'
-  // Add HTTP Origin if datasetServerUrl is a relative path (start with '/')
-  if (typeof window !== 'undefined' && datasetServerUrl.slice(0) === '/') {
-    datasetServerUrl = urljoin(window.location.origin, datasetServerUrl)
-  }
-  return datasetServerUrl
+export interface Datasets {
+  datasets: Dataset[]
 }
 
 export const datasetServerUrlAtom = atom<string>({
-  key: 'datasetServerUrl',
-  default: getDefaultDatasetServer(),
+  key: 'datasetServerUrlAtom',
+  default: '/',
 })
 
-export interface Datasets {
-  datasets: Dataset[]
-  defaultDataset: Dataset
-  defaultDatasetName: string
-  defaultDatasetNameFriendly: string
-}
-
 export const datasetsAtom = atom<Datasets>({
   key: 'datasets',
 })
@@ -52,8 +38,8 @@ export const datasetCurrentAtom = selector<Dataset | undefined>({
       reset(datasetCurrentStorageAtom)
     } else if (!areDatasetsEqual(datasetCurrent, dataset)) {
       set(datasetCurrentStorageAtom, dataset)
-      set(viewedGeneAtom, dataset.params?.defaultGene ?? GENE_OPTION_NUC_SEQUENCE)
-      reset(inputResetAtom)
+      // FIXME
+      // set(viewedGeneAtom, dataset?.defaultGene ?? GENE_OPTION_NUC_SEQUENCE)
     }
   },
 })
@@ -65,7 +51,15 @@ export const datasetUpdatedAtom = atom<Dataset | undefined>({
 
 export const geneOrderPreferenceAtom = selector({
   key: 'geneOrderPreference',
+  // eslint-disable-next-line unused-imports/no-unused-vars
   get({ get }) {
-    return get(datasetCurrentAtom)?.params?.geneOrderPreference ?? []
+    // FIXME
+    // return get(datasetCurrentAtom)?.params?.geneOrderPreference ?? []
+    return []
   },
 })
+
+export const minimizerIndexVersionAtom = atom<MinimizerIndexVersion | undefined>({
+  key: 'minimizerIndexVersionAtom',
+  default: undefined,
+})
diff --git a/packages_rs/nextclade-web/src/state/inputs.state.ts b/packages_rs/nextclade-web/src/state/inputs.state.ts
index 97e27d92e..2ddff2b13 100644
--- a/packages_rs/nextclade-web/src/state/inputs.state.ts
+++ b/packages_rs/nextclade-web/src/state/inputs.state.ts
@@ -1,6 +1,7 @@
 import { isEmpty } from 'lodash'
 import { useCallback } from 'react'
 import { atom, selector, useRecoilState, useResetRecoilState } from 'recoil'
+import { autodetectResultsAtom } from 'src/state/autodetect.state'
 import { AlgorithmInput } from 'src/types'
 import { notUndefinedOrNull } from 'src/helpers/notUndefined'
 
@@ -11,7 +12,8 @@ export const qrySeqInputsStorageAtom = atom<AlgorithmInput[]>({
 
 export function useQuerySeqInputs() {
   const [qryInputs, setQryInputs] = useRecoilState(qrySeqInputsStorageAtom)
-  const clearQryInputs = useResetRecoilState(qrySeqInputsStorageAtom)
+  const resetSeqInputsStorage = useResetRecoilState(qrySeqInputsStorageAtom)
+  const resetAutodetectResults = useResetRecoilState(autodetectResultsAtom)
 
   const addQryInputs = useCallback(
     (newInputs: AlgorithmInput[]) => {
@@ -27,6 +29,11 @@ export function useQuerySeqInputs() {
     [setQryInputs],
   )
 
+  const clearQryInputs = useCallback(() => {
+    resetAutodetectResults()
+    resetSeqInputsStorage()
+  }, [resetAutodetectResults, resetSeqInputsStorage])
+
   return { qryInputs, addQryInputs, removeQryInput, clearQryInputs }
 }
 
@@ -45,11 +52,6 @@ export const refTreeInputAtom = atom<AlgorithmInput | undefined>({
   default: undefined,
 })
 
-export const qcConfigInputAtom = atom<AlgorithmInput | undefined>({
-  key: 'qcConfigInput',
-  default: undefined,
-})
-
 export const virusPropertiesInputAtom = atom<AlgorithmInput | undefined>({
   key: 'virusPropertiesInput',
   default: undefined,
@@ -66,13 +68,9 @@ export const hasRequiredInputsAtom = selector({
 export const inputCustomizationCounterAtom = selector<number>({
   key: 'inputCustomizationCounterAtom',
   get: ({ get }) => {
-    return [
-      get(refSeqInputAtom),
-      get(geneMapInputAtom),
-      get(refTreeInputAtom),
-      get(qcConfigInputAtom),
-      get(virusPropertiesInputAtom),
-    ].filter(notUndefinedOrNull).length
+    return [get(refSeqInputAtom), get(geneMapInputAtom), get(refTreeInputAtom), get(virusPropertiesInputAtom)].filter(
+      notUndefinedOrNull,
+    ).length
   },
 })
 
@@ -85,7 +83,6 @@ export const inputResetAtom = selector<undefined>({
     reset(refSeqInputAtom)
     reset(geneMapInputAtom)
     reset(refTreeInputAtom)
-    reset(qcConfigInputAtom)
     reset(virusPropertiesInputAtom)
   },
 })
diff --git a/packages_rs/nextclade-web/src/state/settings.state.ts b/packages_rs/nextclade-web/src/state/settings.state.ts
index d84aed43e..91f17aa36 100644
--- a/packages_rs/nextclade-web/src/state/settings.state.ts
+++ b/packages_rs/nextclade-web/src/state/settings.state.ts
@@ -32,7 +32,7 @@ export const isNewRunPopupShownAtom = atom<boolean>({
 })
 
 export const isResultsFilterPanelCollapsedAtom = atom<boolean>({
-  key: 'isResultsfilterPanelCollapsed',
+  key: 'isResultsFilterPanelCollapsedAtom',
   default: true,
 })
 
@@ -42,6 +42,12 @@ export const shouldRunAutomaticallyAtom = atom<boolean>({
   effects: [persistAtom],
 })
 
+export const shouldSuggestDatasetsAtom = atom<boolean>({
+  key: 'shouldSuggestDatasetsAtom',
+  default: true,
+  effects: [persistAtom],
+})
+
 export const changelogIsShownAtom = atom<boolean>({
   key: 'changelogIsShown',
   default: false,
diff --git a/packages_rs/nextclade-web/src/styles/components/LanguageSwitcher.scss b/packages_rs/nextclade-web/src/styles/components/LanguageSwitcher.scss
deleted file mode 100644
index bd2f66132..000000000
--- a/packages_rs/nextclade-web/src/styles/components/LanguageSwitcher.scss
+++ /dev/null
@@ -1,19 +0,0 @@
-@import '../variables';
-
-.language-switcher {
-  margin-top: 3px;
-
-  .language-switcher-menu.dropdown-menu {
-    background-color: $body-bg;
-    box-shadow: 1px 1px 2px 2px rgba($gray-600, 0.25);
-  }
-
-  .language-switcher-flag {
-    width: 20px;
-    height: 20px;
-    margin-bottom: 5px;
-    background-size: cover;
-    border-radius: 2px;
-    box-shadow: 1px 1px 2px 2px rgba($gray-600, 0.25);
-  }
-}
diff --git a/packages_rs/nextclade-web/src/styles/components/NavigationBar.scss b/packages_rs/nextclade-web/src/styles/components/NavigationBar.scss
deleted file mode 100644
index 4f1b75a71..000000000
--- a/packages_rs/nextclade-web/src/styles/components/NavigationBar.scss
+++ /dev/null
@@ -1,17 +0,0 @@
-@import '../variables';
-
-.navbar {
-  box-shadow: none;
-  border: none;
-  margin: 0;
-  background-color: $body-bg;
-  height: 50px;
-}
-
-.navbar-scroll {
-  overflow-x: auto;
-  overflow-y: hidden;
-  white-space: nowrap;
-  -webkit-overflow-scrolling: touch;
-}
-
diff --git a/packages_rs/nextclade-web/src/styles/global.scss b/packages_rs/nextclade-web/src/styles/global.scss
index 9ca9e0a3d..a8f6a4289 100644
--- a/packages_rs/nextclade-web/src/styles/global.scss
+++ b/packages_rs/nextclade-web/src/styles/global.scss
@@ -9,29 +9,23 @@
 
 @import './auspice';
 
-@import './components/LanguageSwitcher';
-@import './components/NavigationBar';
 @import './components/Results';
 
-html,
-body {
+html, body, #__next {
+  overflow: hidden;
   height: 100%;
-}
-
-html {
-  overflow-y: auto;
+  width: 100%;
+  padding: 0;
+  margin: 0;
 }
 
 body {
   background-color: $body-bg;
 }
 
-#__next {
-  height: 100%;
-}
-
 .progress {
   @include box-shadow(none);
+
   .progress-bar {
     font-size: 8px;
     line-height: 8px;
@@ -88,6 +82,7 @@ body {
 .hide-native-scrollbar {
   scrollbar-width: none;
   -ms-overflow-style: none;
+
   &::-webkit-scrollbar {
     display: none;
   }
diff --git a/packages_rs/nextclade-web/src/theme.ts b/packages_rs/nextclade-web/src/theme.ts
index 22ac50a4a..4d13bc8a0 100644
--- a/packages_rs/nextclade-web/src/theme.ts
+++ b/packages_rs/nextclade-web/src/theme.ts
@@ -150,6 +150,13 @@ export const uploadZone = {
   },
 }
 
+export const table = {
+  rowBg: {
+    even: '#fcfcfc',
+    odd: '#ededed',
+  },
+}
+
 export const theme = {
   bodyColor: basicColors.gray700,
   bodyBg: basicColors.white,
@@ -163,6 +170,7 @@ export const theme = {
   shadows,
   filePicker,
   uploadZone,
+  table,
 
   seqView: {
     markers: {
diff --git a/packages_rs/nextclade-web/src/types.ts b/packages_rs/nextclade-web/src/types.ts
index e7ed7b776..03fc0a055 100644
--- a/packages_rs/nextclade-web/src/types.ts
+++ b/packages_rs/nextclade-web/src/types.ts
@@ -1,11 +1,9 @@
-import { isEqual, isNil, range, sumBy } from 'lodash'
+import { isNil, range, sumBy } from 'lodash'
 import type {
   Aa,
   Cds,
   CdsSegment,
   Dataset,
-  DatasetFileUrls,
-  DatasetTagJson,
   DatasetsIndexJson,
   FastaRecord,
   InsertionFor_Nuc, // eslint-disable-line camelcase
@@ -40,8 +38,6 @@ export type AnalysisResult = NextcladeOutputs
 export type AnalysisError = NextcladeErrorOutputs
 export type FastaRecordId = StrictOmit<FastaRecord, 'seq'>
 export type DatasetsIndexV2Json = DatasetsIndexJson
-export type DatasetTag = DatasetTagJson
-export type DatasetFiles = DatasetFileUrls
 
 export interface PrivateMutationsInternal {
   reversionSubstitutions: NucSub[]
@@ -131,6 +127,8 @@ export enum AlgorithmInputType {
 }
 
 export interface AlgorithmInput {
+  uid: string
+  path: string
   type: AlgorithmInputType
   name: string
   description: string
@@ -139,5 +137,5 @@ export interface AlgorithmInput {
 }
 
 export function areDatasetsEqual(left?: Dataset, right?: Dataset): boolean {
-  return !isNil(left) && !isNil(right) && isEqual(left.attributes, right.attributes)
+  return !isNil(left?.path) && !isNil(right?.path) && left?.path === right?.path
 }
diff --git a/packages_rs/nextclade-web/src/wasm/jserr.rs b/packages_rs/nextclade-web/src/wasm/jserr.rs
new file mode 100644
index 000000000..0a7b13b8d
--- /dev/null
+++ b/packages_rs/nextclade-web/src/wasm/jserr.rs
@@ -0,0 +1,13 @@
+use eyre::Report;
+use nextclade::utils::error::report_to_string;
+use wasm_bindgen::{JsError, JsValue};
+
+/// Converts Result's Err variant from eyre::Report to wasm_bindgen::JsError
+pub fn jserr<T>(result: Result<T, Report>) -> Result<T, JsError> {
+  result.map_err(|report| JsError::new(&report_to_string(&report)))
+}
+
+/// Converts Result's Err variant from eyre::Report to wasm_bindgen::JsError
+pub fn jserr2<T>(result: Result<T, JsValue>) -> Result<T, JsError> {
+  result.map_err(|err_val| JsError::new(&format!("{err_val:#?}")))
+}
diff --git a/packages_rs/nextclade-web/src/wasm/main.rs b/packages_rs/nextclade-web/src/wasm/main.rs
index 284a06ed2..cac8bb953 100644
--- a/packages_rs/nextclade-web/src/wasm/main.rs
+++ b/packages_rs/nextclade-web/src/wasm/main.rs
@@ -1,13 +1,13 @@
+use crate::wasm::jserr::jserr;
 use eyre::{Report, WrapErr};
 use itertools::Itertools;
 use nextclade::analyze::virus_properties::{AaMotifsDesc, PhenotypeAttrDesc};
-use nextclade::io::errors_csv::{errors_to_csv_string, ErrorsFromWeb};
 use nextclade::io::fasta::{read_one_fasta_str, FastaReader, FastaRecord};
-use nextclade::io::insertions_csv::insertions_to_csv_string;
 use nextclade::io::json::{json_parse, json_stringify, JsonPretty};
 use nextclade::io::nextclade_csv::{results_to_csv_string, CsvColumnConfig};
 use nextclade::io::results_json::{results_to_json_string, results_to_ndjson_string};
-use nextclade::run::nextclade_wasm::{Nextclade, NextcladeParams, NextcladeParamsRaw};
+use nextclade::run::nextclade_wasm::{Nextclade, NextcladeParams, NextcladeParamsRaw, NextcladeResult};
+use nextclade::run::params::NextcladeInputParamsOptional;
 use nextclade::tree::tree::CladeNodeAttrKeyDesc;
 use nextclade::types::outputs::{NextcladeErrorOutputs, NextcladeOutputs};
 use nextclade::utils::error::report_to_string;
@@ -15,11 +15,6 @@ use std::io::Read;
 use std::str::FromStr;
 use wasm_bindgen::prelude::*;
 
-/// Converts Result's Err variant from eyre::Report to wasm_bindgen::JsError
-fn jserr<T>(result: Result<T, Report>) -> Result<T, JsError> {
-  result.map_err(|report| JsError::new(&report_to_string(&report)))
-}
-
 /// Nextclade WebAssembly module.
 ///
 /// Encapsulates all the Nextclade Rust functionality required for Nextclade Web to operate.
@@ -34,16 +29,20 @@ impl NextcladeWasm {
     let params_raw: NextcladeParamsRaw =
       jserr(json_parse(params).wrap_err_with(|| "When parsing Nextclade params JSON"))?;
 
-    let params: NextcladeParams =
-      jserr(NextcladeParams::from_raw(&params_raw).wrap_err_with(|| "When parsing raw Nextclade params"))?;
+    let inputs: NextcladeParams =
+      jserr(NextcladeParams::from_raw(params_raw).wrap_err_with(|| "When parsing raw Nextclade params"))?;
+
+    // FIXME: pass params from the frontend
+    let params = NextcladeInputParamsOptional::default();
 
-    let nextclade: Nextclade = jserr(Nextclade::new(params).wrap_err_with(|| "When initializing Nextclade runner"))?;
+    let nextclade: Nextclade =
+      jserr(Nextclade::new(inputs, &params).wrap_err_with(|| "When initializing Nextclade runner"))?;
 
     Ok(Self { nextclade })
   }
 
   pub fn parse_query_sequences(qry_fasta_str: &str, callback: &js_sys::Function) -> Result<(), JsError> {
-    let mut reader = jserr(FastaReader::from_str(qry_fasta_str).wrap_err_with(|| "When creating fasta reader"))?;
+    let mut reader = jserr(FastaReader::from_str(&qry_fasta_str).wrap_err_with(|| "When creating fasta reader"))?;
 
     loop {
       let mut record = FastaRecord::default();
@@ -65,14 +64,29 @@ impl NextcladeWasm {
   }
 
   pub fn get_initial_data(&self) -> Result<String, JsError> {
-    let initial_data = jserr(self.nextclade.get_initial_data())?;
+    let initial_data = self.nextclade.get_initial_data();
     jserr(json_stringify(&initial_data, JsonPretty(false)))
   }
 
   /// Runs analysis on one sequence and returns its result. This runs in many webworkers concurrently.
   pub fn analyze(&mut self, input: &str) -> Result<String, JsError> {
     let input: FastaRecord = jserr(json_parse(input).wrap_err("When parsing FASTA record JSON"))?;
-    let result = jserr(self.nextclade.run(&input))?;
+
+    let result = jserr(match self.nextclade.run(&input) {
+      Ok(result) => Ok(NextcladeResult {
+        index: input.index,
+        seq_name: input.seq_name.clone(),
+        result: Some(result),
+        error: None,
+      }),
+      Err(err) => Ok(NextcladeResult {
+        index: input.index,
+        seq_name: input.seq_name.clone(),
+        result: None,
+        error: Some(report_to_string(&err)),
+      }),
+    })?;
+
     jserr(json_stringify(&result, JsonPretty(false)))
   }
 
@@ -192,31 +206,4 @@ impl NextcladeWasm {
       &csv_colum_config,
     ))
   }
-
-  pub fn serialize_insertions_csv(outputs_json_str: &str, errors_json_str: &str) -> Result<String, JsError> {
-    let outputs: Vec<NextcladeOutputs> = jserr(
-      json_parse(outputs_json_str)
-        .wrap_err("When serializing insertions into CSV: When parsing outputs JSON internally"),
-    )?;
-
-    let errors: Vec<NextcladeErrorOutputs> = jserr(
-      json_parse(errors_json_str).wrap_err("When serializing results into CSV: When parsing errors JSON internally"),
-    )?;
-
-    jserr(insertions_to_csv_string(&outputs, &errors))
-  }
-
-  pub fn serialize_errors_csv(errors_json_str: &str) -> Result<String, JsError> {
-    let errors: Vec<ErrorsFromWeb> = jserr(
-      json_parse(errors_json_str).wrap_err("When serializing errors into CSV: When parsing outputs JSON internally"),
-    )?;
-
-    jserr(errors_to_csv_string(&errors))
-  }
-}
-
-#[wasm_bindgen(start)]
-pub fn main() {
-  wasm_logger::init(wasm_logger::Config::default());
-  console_error_panic_hook::set_once();
 }
diff --git a/packages_rs/nextclade-web/src/wasm/mod.rs b/packages_rs/nextclade-web/src/wasm/mod.rs
index 2a043412b..b5f7dee46 100644
--- a/packages_rs/nextclade-web/src/wasm/mod.rs
+++ b/packages_rs/nextclade-web/src/wasm/mod.rs
@@ -1 +1,3 @@
+pub mod jserr;
 pub mod main;
+pub mod seq_autodetect;
diff --git a/packages_rs/nextclade-web/src/wasm/seq_autodetect.rs b/packages_rs/nextclade-web/src/wasm/seq_autodetect.rs
new file mode 100644
index 000000000..1a3fe6c3e
--- /dev/null
+++ b/packages_rs/nextclade-web/src/wasm/seq_autodetect.rs
@@ -0,0 +1,97 @@
+use crate::wasm::jserr::{jserr, jserr2};
+use chrono::Duration;
+use eyre::WrapErr;
+use nextclade::io::fasta::{FastaReader, FastaRecord};
+use nextclade::io::json::json_parse;
+use nextclade::sort::minimizer_index::MinimizerIndexJson;
+use nextclade::sort::minimizer_search::{run_minimizer_search, MinimizerSearchRecord};
+use nextclade::sort::params::NextcladeSeqSortParams;
+use nextclade::utils::datetime::date_now;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::io::Read;
+use std::str::FromStr;
+use wasm_bindgen::prelude::*;
+
+#[wasm_bindgen]
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct NextcladeSeqAutodetectWasmParams {
+  batch_interval_ms: i64,
+  max_batch_size: usize,
+}
+
+impl Default for NextcladeSeqAutodetectWasmParams {
+  fn default() -> Self {
+    Self {
+      batch_interval_ms: 500,
+      max_batch_size: 100,
+    }
+  }
+}
+
+#[wasm_bindgen]
+pub struct NextcladeSeqAutodetectWasm {
+  minimizer_index: MinimizerIndexJson,
+  run_params: NextcladeSeqAutodetectWasmParams,
+}
+
+#[wasm_bindgen]
+impl NextcladeSeqAutodetectWasm {
+  pub fn new(minimizer_index_json_str: &str, params: &str) -> Result<NextcladeSeqAutodetectWasm, JsError> {
+    let minimizer_index = jserr(MinimizerIndexJson::from_str(minimizer_index_json_str))?;
+    Ok(Self {
+      minimizer_index,
+      run_params: jserr(json_parse(params))?,
+    })
+  }
+
+  pub fn autodetect(&self, qry_fasta_str: &str, callback: &js_sys::Function) -> Result<(), JsError> {
+    let mut reader = jserr(FastaReader::from_str(&qry_fasta_str).wrap_err_with(|| "When creating fasta reader"))?;
+
+    let search_params = NextcladeSeqSortParams::default();
+
+    let mut batch = vec![];
+    let mut last_flush = date_now();
+
+    loop {
+      let mut fasta_record = FastaRecord::default();
+      jserr(reader.read(&mut fasta_record).wrap_err("When reading a fasta record"))?;
+      if fasta_record.is_empty() {
+        break;
+      }
+
+      let result = jserr(
+        run_minimizer_search(&fasta_record, &self.minimizer_index, &search_params).wrap_err_with(|| {
+          format!(
+            "When processing sequence #{} '{}'",
+            fasta_record.index, fasta_record.seq_name
+          )
+        }),
+      )?;
+
+      batch.push(MinimizerSearchRecord { fasta_record, result });
+
+      if date_now() - last_flush >= Duration::milliseconds(self.run_params.batch_interval_ms)
+        || batch.len() >= self.run_params.max_batch_size
+      {
+        self.flush_batch(callback, &mut batch)?;
+        last_flush = date_now();
+      }
+    }
+
+    self.flush_batch(callback, &mut batch)?;
+
+    Ok(())
+  }
+
+  fn flush_batch(&self, callback: &js_sys::Function, batch: &mut Vec<MinimizerSearchRecord>) -> Result<(), JsError> {
+    if batch.is_empty() {
+      return Ok(());
+    }
+    let result_js = serde_wasm_bindgen::to_value(&batch)?;
+    jserr2(callback.call1(&JsValue::null(), &result_js))?;
+    batch.clear();
+    Ok(())
+  }
+}
diff --git a/packages_rs/nextclade-web/src/workers/ExportThread.ts b/packages_rs/nextclade-web/src/workers/ExportThread.ts
index 65356ca51..e8d0bf198 100644
--- a/packages_rs/nextclade-web/src/workers/ExportThread.ts
+++ b/packages_rs/nextclade-web/src/workers/ExportThread.ts
@@ -1,5 +1,5 @@
 import { CladeNodeAttrDesc } from 'auspice'
-import type { AaMotifsDesc, AnalysisError, AnalysisResult, ErrorsFromWeb, PhenotypeAttrDesc } from 'src/types'
+import type { AaMotifsDesc, AnalysisError, AnalysisResult, PhenotypeAttrDesc } from 'src/types'
 import type { NextcladeWasmWorker } from 'src/workers/nextcladeWasm.worker'
 import { spawn } from 'src/workers/spawn'
 import { CsvColumnConfig } from 'src/types'
@@ -64,14 +64,6 @@ export class ExportWorker {
     return this.thread.serializeResultsNdjson(results, errors)
   }
 
-  public async serializeInsertionsCsv(results: AnalysisResult[], errors: AnalysisError[]) {
-    return this.thread.serializeInsertionsCsv(results, errors)
-  }
-
-  public async serializeErrorsCsv(errors: ErrorsFromWeb[]) {
-    return this.thread.serializeErrorsCsv(errors)
-  }
-
   private async destroy() {
     await this.thread.destroy()
   }
diff --git a/packages_rs/nextclade-web/src/workers/launchAnalysis.ts b/packages_rs/nextclade-web/src/workers/launchAnalysis.ts
index ba4d9e8f4..f7af20736 100644
--- a/packages_rs/nextclade-web/src/workers/launchAnalysis.ts
+++ b/packages_rs/nextclade-web/src/workers/launchAnalysis.ts
@@ -1,13 +1,11 @@
 import { concurrent } from 'fasy'
 import { isEmpty, merge } from 'lodash'
-
 import type {
   AlgorithmInput,
   Dataset,
   FastaRecordId,
   NextcladeResult,
   CsvColumnConfig,
-  NextcladeParams,
   NextcladeParamsRaw,
   AnalysisInitialData,
   OutputTrees,
@@ -22,7 +20,6 @@ export interface LaunchAnalysisInputs {
   refSeq: Promise<AlgorithmInput | undefined>
   geneMap: Promise<AlgorithmInput | undefined>
   tree: Promise<AlgorithmInput | undefined>
-  qcConfig: Promise<AlgorithmInput | undefined>
   virusProperties: Promise<AlgorithmInput | undefined>
 }
 
@@ -36,15 +33,6 @@ export interface LaunchAnalysisCallbacks {
   onComplete: () => void
 }
 
-/** Maps input field names to the dataset field names, so that we know which one to take */
-const DATASET_FILE_NAME_MAPPING: Record<keyof LaunchAnalysisInputs, string> = {
-  refSeq: 'reference.fasta',
-  geneMap: 'genemap.gff',
-  tree: 'tree.json',
-  qcConfig: 'qc.json',
-  virusProperties: 'virus_properties.json',
-}
-
 export async function launchAnalysis(
   qryFastaInputs: Promise<AlgorithmInput[]>,
   paramInputs: LaunchAnalysisInputs,
@@ -100,7 +88,7 @@ export async function launchAnalysis(
   }
 }
 
-async function getQueryFasta(inputs: AlgorithmInput[]) {
+export async function getQueryFasta(inputs: AlgorithmInput[]) {
   if (isEmpty(inputs)) {
     throw new Error('Sequence fasta data is not available, but required')
   }
@@ -109,29 +97,32 @@ async function getQueryFasta(inputs: AlgorithmInput[]) {
   return contents.join('\n')
 }
 
-/** Typed output of Object.entries(), assuming all fields have the same type */
-type Entry<T, V> = [keyof T, V]
-
-type LaunchAnalysisInputsEntry = Entry<LaunchAnalysisInputs, Promise<AlgorithmInput | undefined>>
-
 /** Resolves all param inputs into strings */
 async function getParams(paramInputs: LaunchAnalysisInputs, dataset: Dataset): Promise<NextcladeParamsRaw> {
-  const paramInputsEntries = Object.entries(paramInputs) as LaunchAnalysisInputsEntry[]
+  const entries = [
+    { key: 'geneMap', input: paramInputs.geneMap, datasetFileUrl: dataset.files.genomeAnnotation },
+    { key: 'refSeq', input: paramInputs.refSeq, datasetFileUrl: dataset.files.reference },
+    { key: 'tree', input: paramInputs.tree, datasetFileUrl: dataset.files.treeJson },
+    { key: 'virusProperties', input: paramInputs.virusProperties, datasetFileUrl: dataset.files.pathogenJson },
+  ]
 
   return Object.fromEntries(
-    await concurrent.map(async ([key, input]: LaunchAnalysisInputsEntry): Promise<Entry<NextcladeParams, string>> => {
-      const datasetKey = DATASET_FILE_NAME_MAPPING[key]
-      const content = await resolveInput(await input, dataset.files[datasetKey])
-      return [key as keyof NextcladeParams, content]
-    }, paramInputsEntries),
+    await concurrent.map(async ({ key, input, datasetFileUrl }) => {
+      return [key, await resolveInput(await input, datasetFileUrl)]
+    }, entries),
   ) as unknown as NextcladeParamsRaw
 }
 
-async function resolveInput(input: AlgorithmInput | undefined, datasetFileUrl: string) {
+async function resolveInput(input: AlgorithmInput | undefined, datasetFileUrl: string | undefined) {
   // If data is provided explicitly, load it
   if (input) {
     return input.getContent()
   }
+
   // Otherwise fetch corresponding file from the dataset
-  return axiosFetchRaw(datasetFileUrl)
+  if (datasetFileUrl) {
+    return axiosFetchRaw(datasetFileUrl)
+  }
+
+  return undefined
 }
diff --git a/packages_rs/nextclade-web/src/workers/nextcladeAutodetect.worker.ts b/packages_rs/nextclade-web/src/workers/nextcladeAutodetect.worker.ts
new file mode 100644
index 000000000..f2824f216
--- /dev/null
+++ b/packages_rs/nextclade-web/src/workers/nextcladeAutodetect.worker.ts
@@ -0,0 +1,76 @@
+import 'regenerator-runtime'
+
+import { ErrorInternal } from 'src/helpers/ErrorInternal'
+import { sanitizeError } from 'src/helpers/sanitizeError'
+import { MinimizerIndexJson, MinimizerSearchRecord } from 'src/types'
+import { Observable as ThreadsObservable, Subject } from 'threads/observable'
+import type { Thread } from 'threads'
+import { expose } from 'threads/worker'
+import { NextcladeSeqAutodetectWasm } from 'src/gen/nextclade-wasm'
+
+const gSubject = new Subject<MinimizerSearchRecord[]>()
+
+function onResultParsed(res: MinimizerSearchRecord[]) {
+  gSubject.next(res)
+}
+
+/**
+ * Keeps the reference to the WebAssembly module.The module is stateful and requires manual initialization
+ * and teardown.
+ * This cloud be a class instance, but unfortunately we cannot pass classes to/from WebWorkers (yet?).
+ */
+let nextcladeAutodetect: NextcladeSeqAutodetectWasm | undefined
+
+/** Creates the underlying WebAssembly module. */
+async function create(minimizerIndexJsonStr: MinimizerIndexJson) {
+  nextcladeAutodetect = NextcladeSeqAutodetectWasm.new(
+    JSON.stringify(minimizerIndexJsonStr),
+    JSON.stringify({ batchIntervalMs: 250, maxBatchSize: 1000 }),
+  )
+}
+
+/** Destroys the underlying WebAssembly module. */
+async function destroy() {
+  if (!nextcladeAutodetect) {
+    return
+  }
+
+  nextcladeAutodetect.free()
+  nextcladeAutodetect = undefined
+}
+
+async function autodetect(fasta: string): Promise<void> {
+  if (!nextcladeAutodetect) {
+    throw new ErrorModuleNotInitialized('autodetect')
+  }
+
+  try {
+    nextcladeAutodetect.autodetect(fasta, onResultParsed)
+  } catch (error: unknown) {
+    gSubject.error(sanitizeError(error))
+  }
+
+  gSubject.complete()
+}
+
+const worker = {
+  create,
+  destroy,
+  autodetect,
+  values(): ThreadsObservable<MinimizerSearchRecord[]> {
+    return ThreadsObservable.from(gSubject)
+  },
+}
+
+expose(worker)
+
+export type NextcladeSeqAutodetectWasmWorker = typeof worker
+export type NextcladeSeqAutodetectWasmThread = NextcladeSeqAutodetectWasmWorker & Thread
+
+export class ErrorModuleNotInitialized extends ErrorInternal {
+  constructor(fnName: string) {
+    super(
+      `This WebWorker module has not been initialized yet. When calling module.${fnName} Make sure to call 'module.create()' function.`,
+    )
+  }
+}
diff --git a/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts b/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts
index 9c1ed86b2..df51a0a02 100644
--- a/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts
+++ b/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts
@@ -11,7 +11,6 @@ import type {
   AnalysisError,
   AnalysisResult,
   CsvColumnConfig,
-  ErrorsFromWeb,
   FastaRecord,
   NextcladeParamsRaw,
   NextcladeResult,
@@ -76,8 +75,8 @@ async function getInitialData(): Promise<AnalysisInitialData> {
   if (!nextcladeWasm) {
     throw new ErrorModuleNotInitialized('getInitialData')
   }
-  const aaa = nextcladeWasm.get_initial_data()
-  const initialData = JSON.parse(aaa) as AnalysisInitialData
+  const initialDataStr = nextcladeWasm.get_initial_data()
+  const initialData = JSON.parse(initialDataStr) as AnalysisInitialData
   return {
     ...initialData,
     geneMap: prepareGeneMap(initialData.geneMap),
@@ -160,14 +159,6 @@ export async function serializeResultsCsv(
   )
 }
 
-async function serializeInsertionsCsv(results: AnalysisResult[], errors: AnalysisError[]) {
-  return NextcladeWasm.serialize_insertions_csv(JSON.stringify(results), JSON.stringify(errors))
-}
-
-async function serializeErrorsCsv(errors: ErrorsFromWeb[]) {
-  return NextcladeWasm.serialize_errors_csv(JSON.stringify(errors))
-}
-
 const worker = {
   create,
   destroy,
@@ -179,8 +170,6 @@ const worker = {
   serializeResultsJson,
   serializeResultsCsv,
   serializeResultsNdjson,
-  serializeInsertionsCsv,
-  serializeErrorsCsv,
   values(): ThreadsObservable<FastaRecord> {
     return ThreadsObservable.from(gSubject)
   },
diff --git a/packages_rs/nextclade-web/yarn.lock b/packages_rs/nextclade-web/yarn.lock
index 1f8cc582f..df7774898 100644
--- a/packages_rs/nextclade-web/yarn.lock
+++ b/packages_rs/nextclade-web/yarn.lock
@@ -5508,7 +5508,7 @@ commander@10.0.1:
   resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06"
   integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==
 
-commander@^2.18.0, commander@^2.20.0:
+commander@^2.18.0, commander@^2.19.0, commander@^2.20.0:
   version "2.20.3"
   resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
   integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
@@ -8747,6 +8747,30 @@ hast-util-raw@6.0.1:
     xtend "^4.0.0"
     zwitch "^1.0.0"
 
+hast-util-raw@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/hast-util-raw/-/hast-util-raw-6.1.0.tgz#e16a3c2642f65cc7c480c165400a40d604ab75d0"
+  integrity sha512-5FoZLDHBpka20OlZZ4I/+RBw5piVQ8iI1doEvffQhx5CbCyTtP8UCq8Tw6NmTAMtXgsQxmhW7Ly8OdFre5/YMQ==
+  dependencies:
+    "@types/hast" "^2.0.0"
+    hast-util-from-parse5 "^6.0.0"
+    hast-util-to-parse5 "^6.0.0"
+    html-void-elements "^1.0.0"
+    parse5 "^6.0.0"
+    unist-util-position "^3.0.0"
+    unist-util-visit "^2.0.0"
+    vfile "^4.0.0"
+    web-namespaces "^1.0.0"
+    xtend "^4.0.0"
+    zwitch "^1.0.0"
+
+hast-util-sanitize@^3.0.0:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/hast-util-sanitize/-/hast-util-sanitize-3.0.2.tgz#b0b783220af528ba8fe6999f092d138908678520"
+  integrity sha512-+2I0x2ZCAyiZOO/sb4yNLFmdwPBnyJ4PBkVTUMKMqBwYNA+lXSgOmoRXlJFazoyid9QPogRRKgKhVEodv181sA==
+  dependencies:
+    xtend "^4.0.0"
+
 hast-util-to-parse5@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/hast-util-to-parse5/-/hast-util-to-parse5-6.0.0.tgz#1ec44650b631d72952066cea9b1445df699f8479"
@@ -9166,6 +9190,11 @@ irregular-plurals@^1.0.0:
   resolved "https://registry.yarnpkg.com/irregular-plurals/-/irregular-plurals-1.4.0.tgz#2ca9b033651111855412f16be5d77c62a458a766"
   integrity sha1-LKmwM2UREYVUEvFr5dd8YqRYp2Y=
 
+is-absolute-url@3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-3.0.3.tgz#96c6a22b6a23929b11ea0afb1836c36ad4a5d698"
+  integrity sha512-opmNIX7uFnS96NtPmhWQgQx6/NYFgsUXYMllcfzwWKUMwfo8kku1TvE6hkNcH+Q1ts5cMVrsY7j0bxXQDciu9Q==
+
 is-absolute@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/is-absolute/-/is-absolute-1.0.0.tgz#395e1ae84b11f26ad1795e73c17378e48a301576"
@@ -10441,6 +10470,13 @@ jws@^4.0.0:
     jwa "^2.0.0"
     safe-buffer "^5.0.1"
 
+katex@^0.12.0:
+  version "0.12.0"
+  resolved "https://registry.yarnpkg.com/katex/-/katex-0.12.0.tgz#2fb1c665dbd2b043edcf8a1f5c555f46beaa0cb9"
+  integrity sha512-y+8btoc/CK70XqcHqjxiGWBOeIL8upbS0peTPXTvgrh21n1RiWWcIpSWM+4uXq+IAgNh9YYQWdc7LVDPDAEEAg==
+  dependencies:
+    commander "^2.19.0"
+
 keyv@^3.0.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/keyv/-/keyv-3.1.0.tgz#ecc228486f69991e49e9476485a5be1e8fc5c4d9"
@@ -10902,6 +10938,13 @@ markdown-escapes@^1.0.0:
   resolved "https://registry.yarnpkg.com/markdown-escapes/-/markdown-escapes-1.0.4.tgz#c95415ef451499d7602b91095f3c8e8975f78535"
   integrity sha512-8z4efJYk43E0upd0NbVXwgSTQs6cT3T06etieCMEg7dRbzCbxUCK/GHlX8mhHRDcp+OLlHkPKsvqQTCvsRl2cg==
 
+markdown-table@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-2.0.0.tgz#194a90ced26d31fe753d8b9434430214c011865b"
+  integrity sha512-Ezda85ToJUBhM6WGaG6veasyym+Tbs3cMAw/ZhOPqXiYsr0jgocBV3j3nx+4lk47plLlIqjwuTm/ywVI+zjJ/A==
+  dependencies:
+    repeat-string "^1.0.0"
+
 marked@4.0.14, marked@^0.7.0:
   version "4.0.14"
   resolved "https://registry.yarnpkg.com/marked/-/marked-4.0.14.tgz#7a3a5fa5c80580bac78c1ed2e3b84d7bd6fc3870"
@@ -10951,6 +10994,15 @@ mdast-util-definitions@^4.0.0:
   dependencies:
     unist-util-visit "^2.0.0"
 
+mdast-util-find-and-replace@^1.1.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/mdast-util-find-and-replace/-/mdast-util-find-and-replace-1.1.1.tgz#b7db1e873f96f66588c321f1363069abf607d1b5"
+  integrity sha512-9cKl33Y21lyckGzpSmEQnIDjEfeeWelN5s1kUW1LwdB0Fkuq2u+4GdqcGEygYxJE8GVqCl0741bYXHgamfWAZA==
+  dependencies:
+    escape-string-regexp "^4.0.0"
+    unist-util-is "^4.0.0"
+    unist-util-visit-parents "^3.0.0"
+
 mdast-util-from-markdown@^0.8.0:
   version "0.8.5"
   resolved "https://registry.yarnpkg.com/mdast-util-from-markdown/-/mdast-util-from-markdown-0.8.5.tgz#d1ef2ca42bc377ecb0463a987910dae89bd9a28c"
@@ -10962,6 +11014,57 @@ mdast-util-from-markdown@^0.8.0:
     parse-entities "^2.0.0"
     unist-util-stringify-position "^2.0.0"
 
+mdast-util-gfm-autolink-literal@^0.1.0:
+  version "0.1.3"
+  resolved "https://registry.yarnpkg.com/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-0.1.3.tgz#9c4ff399c5ddd2ece40bd3b13e5447d84e385fb7"
+  integrity sha512-GjmLjWrXg1wqMIO9+ZsRik/s7PLwTaeCHVB7vRxUwLntZc8mzmTsLVr6HW1yLokcnhfURsn5zmSVdi3/xWWu1A==
+  dependencies:
+    ccount "^1.0.0"
+    mdast-util-find-and-replace "^1.1.0"
+    micromark "^2.11.3"
+
+mdast-util-gfm-strikethrough@^0.2.0:
+  version "0.2.3"
+  resolved "https://registry.yarnpkg.com/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-0.2.3.tgz#45eea337b7fff0755a291844fbea79996c322890"
+  integrity sha512-5OQLXpt6qdbttcDG/UxYY7Yjj3e8P7X16LzvpX8pIQPYJ/C2Z1qFGMmcw+1PZMUM3Z8wt8NRfYTvCni93mgsgA==
+  dependencies:
+    mdast-util-to-markdown "^0.6.0"
+
+mdast-util-gfm-table@^0.1.0:
+  version "0.1.6"
+  resolved "https://registry.yarnpkg.com/mdast-util-gfm-table/-/mdast-util-gfm-table-0.1.6.tgz#af05aeadc8e5ee004eeddfb324b2ad8c029b6ecf"
+  integrity sha512-j4yDxQ66AJSBwGkbpFEp9uG/LS1tZV3P33fN1gkyRB2LoRL+RR3f76m0HPHaby6F4Z5xr9Fv1URmATlRRUIpRQ==
+  dependencies:
+    markdown-table "^2.0.0"
+    mdast-util-to-markdown "~0.6.0"
+
+mdast-util-gfm-task-list-item@^0.1.0:
+  version "0.1.6"
+  resolved "https://registry.yarnpkg.com/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-0.1.6.tgz#70c885e6b9f543ddd7e6b41f9703ee55b084af10"
+  integrity sha512-/d51FFIfPsSmCIRNp7E6pozM9z1GYPIkSy1urQ8s/o4TC22BZ7DqfHFWiqBD23bc7J3vV1Fc9O4QIHBlfuit8A==
+  dependencies:
+    mdast-util-to-markdown "~0.6.0"
+
+mdast-util-gfm@^0.1.0:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/mdast-util-gfm/-/mdast-util-gfm-0.1.2.tgz#8ecddafe57d266540f6881f5c57ff19725bd351c"
+  integrity sha512-NNkhDx/qYcuOWB7xHUGWZYVXvjPFFd6afg6/e2g+SV4r9q5XUcCbV4Wfa3DLYIiD+xAEZc6K4MGaE/m0KDcPwQ==
+  dependencies:
+    mdast-util-gfm-autolink-literal "^0.1.0"
+    mdast-util-gfm-strikethrough "^0.2.0"
+    mdast-util-gfm-table "^0.1.0"
+    mdast-util-gfm-task-list-item "^0.1.0"
+    mdast-util-to-markdown "^0.6.1"
+
+mdast-util-math@^0.1.0:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/mdast-util-math/-/mdast-util-math-0.1.2.tgz#629a0793bd8822432917e5ddda5279492390cc2b"
+  integrity sha512-fogAitds+wH+QRas78Yr1TwmQGN4cW/G2WRw5ePuNoJbBSPJCxIOCE8MTzHgWHVSpgkRaPQTgfzXRE1CrwWSlg==
+  dependencies:
+    longest-streak "^2.0.0"
+    mdast-util-to-markdown "^0.6.0"
+    repeat-string "^1.0.0"
+
 mdast-util-to-hast@10.0.1:
   version "10.0.1"
   resolved "https://registry.yarnpkg.com/mdast-util-to-hast/-/mdast-util-to-hast-10.0.1.tgz#0cfc82089494c52d46eb0e3edb7a4eb2aea021eb"
@@ -10976,7 +11079,21 @@ mdast-util-to-hast@10.0.1:
     unist-util-position "^3.0.0"
     unist-util-visit "^2.0.0"
 
-mdast-util-to-markdown@^0.6.0:
+mdast-util-to-hast@^10.2.0:
+  version "10.2.0"
+  resolved "https://registry.yarnpkg.com/mdast-util-to-hast/-/mdast-util-to-hast-10.2.0.tgz#61875526a017d8857b71abc9333942700b2d3604"
+  integrity sha512-JoPBfJ3gBnHZ18icCwHR50orC9kNH81tiR1gs01D8Q5YpV6adHNO9nKNuFBCJQ941/32PT1a63UF/DitmS3amQ==
+  dependencies:
+    "@types/mdast" "^3.0.0"
+    "@types/unist" "^2.0.0"
+    mdast-util-definitions "^4.0.0"
+    mdurl "^1.0.0"
+    unist-builder "^2.0.0"
+    unist-util-generated "^1.0.0"
+    unist-util-position "^3.0.0"
+    unist-util-visit "^2.0.0"
+
+mdast-util-to-markdown@^0.6.0, mdast-util-to-markdown@^0.6.1, mdast-util-to-markdown@~0.6.0:
   version "0.6.5"
   resolved "https://registry.yarnpkg.com/mdast-util-to-markdown/-/mdast-util-to-markdown-0.6.5.tgz#b33f67ca820d69e6cc527a93d4039249b504bebe"
   integrity sha512-XeV9sDE7ZlOQvs45C9UKMtfTcctcaj/pGwH8YLbMHoMOXNNCn2LsqVQOqrF1+/NU8lKDAqozme9SCXWyo9oAcQ==
@@ -11135,7 +11252,60 @@ methods@~1.1.2:
   resolved "https://registry.yarnpkg.com/methods/-/methods-1.1.2.tgz#5529a4d67654134edcc5266656835b0f851afcee"
   integrity sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=
 
-micromark@~2.11.0:
+micromark-extension-gfm-autolink-literal@~0.5.0:
+  version "0.5.7"
+  resolved "https://registry.yarnpkg.com/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-0.5.7.tgz#53866c1f0c7ef940ae7ca1f72c6faef8fed9f204"
+  integrity sha512-ePiDGH0/lhcngCe8FtH4ARFoxKTUelMp4L7Gg2pujYD5CSMb9PbblnyL+AAMud/SNMyusbS2XDSiPIRcQoNFAw==
+  dependencies:
+    micromark "~2.11.3"
+
+micromark-extension-gfm-strikethrough@~0.6.5:
+  version "0.6.5"
+  resolved "https://registry.yarnpkg.com/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-0.6.5.tgz#96cb83356ff87bf31670eefb7ad7bba73e6514d1"
+  integrity sha512-PpOKlgokpQRwUesRwWEp+fHjGGkZEejj83k9gU5iXCbDG+XBA92BqnRKYJdfqfkrRcZRgGuPuXb7DaK/DmxOhw==
+  dependencies:
+    micromark "~2.11.0"
+
+micromark-extension-gfm-table@~0.4.0:
+  version "0.4.3"
+  resolved "https://registry.yarnpkg.com/micromark-extension-gfm-table/-/micromark-extension-gfm-table-0.4.3.tgz#4d49f1ce0ca84996c853880b9446698947f1802b"
+  integrity sha512-hVGvESPq0fk6ALWtomcwmgLvH8ZSVpcPjzi0AjPclB9FsVRgMtGZkUcpE0zgjOCFAznKepF4z3hX8z6e3HODdA==
+  dependencies:
+    micromark "~2.11.0"
+
+micromark-extension-gfm-tagfilter@~0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-0.3.0.tgz#d9f26a65adee984c9ccdd7e182220493562841ad"
+  integrity sha512-9GU0xBatryXifL//FJH+tAZ6i240xQuFrSL7mYi8f4oZSbc+NvXjkrHemeYP0+L4ZUT+Ptz3b95zhUZnMtoi/Q==
+
+micromark-extension-gfm-task-list-item@~0.3.0:
+  version "0.3.3"
+  resolved "https://registry.yarnpkg.com/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-0.3.3.tgz#d90c755f2533ed55a718129cee11257f136283b8"
+  integrity sha512-0zvM5iSLKrc/NQl84pZSjGo66aTGd57C1idmlWmE87lkMcXrTxg1uXa/nXomxJytoje9trP0NDLvw4bZ/Z/XCQ==
+  dependencies:
+    micromark "~2.11.0"
+
+micromark-extension-gfm@^0.3.0:
+  version "0.3.3"
+  resolved "https://registry.yarnpkg.com/micromark-extension-gfm/-/micromark-extension-gfm-0.3.3.tgz#36d1a4c089ca8bdfd978c9bd2bf1a0cb24e2acfe"
+  integrity sha512-oVN4zv5/tAIA+l3GbMi7lWeYpJ14oQyJ3uEim20ktYFAcfX1x3LNlFGGlmrZHt7u9YlKExmyJdDGaTt6cMSR/A==
+  dependencies:
+    micromark "~2.11.0"
+    micromark-extension-gfm-autolink-literal "~0.5.0"
+    micromark-extension-gfm-strikethrough "~0.6.5"
+    micromark-extension-gfm-table "~0.4.0"
+    micromark-extension-gfm-tagfilter "~0.3.0"
+    micromark-extension-gfm-task-list-item "~0.3.0"
+
+micromark-extension-math@^0.1.0:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/micromark-extension-math/-/micromark-extension-math-0.1.2.tgz#5d7bb2b86018da4a758c05f3991664430ee4d711"
+  integrity sha512-ZJXsT2eVPM8VTmcw0CPSDeyonOn9SziGK3Z+nkf9Vb6xMPeU+4JMEnO6vzDL10562Favw8Vste74f54rxJ/i6Q==
+  dependencies:
+    katex "^0.12.0"
+    micromark "~2.11.0"
+
+micromark@^2.11.3, micromark@~2.11.0, micromark@~2.11.3:
   version "2.11.4"
   resolved "https://registry.yarnpkg.com/micromark/-/micromark-2.11.4.tgz#d13436138eea826383e822449c9a5c50ee44665a"
   integrity sha512-+WoovN/ppKolQOFIAajxi7Lu9kInbPxFuTBVEavFcL8eAfVstoc5MocPmqBeAdBOJV00uaVjegzH4+MA0DN/uA==
@@ -11345,6 +11515,11 @@ nano-time@1.0.0:
   dependencies:
     big-integer "^1.6.16"
 
+nanoid@3.3.6:
+  version "3.3.6"
+  resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c"
+  integrity sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==
+
 nanoid@^3.1.30, nanoid@^3.3.1:
   version "3.3.3"
   resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.3.tgz#fd8e8b7aa761fe807dba2d1b98fb7241bb724a25"
@@ -13104,10 +13279,10 @@ react-i18next@11.3.3, react-i18next@^11.3.3:
     "@babel/runtime" "^7.3.1"
     html-parse-stringify2 "2.0.1"
 
-react-icons@4.3.1:
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/react-icons/-/react-icons-4.3.1.tgz#2fa92aebbbc71f43d2db2ed1aed07361124e91ca"
-  integrity sha512-cB10MXLTs3gVuXimblAdI71jrJx8njrJZmNMEMC+sQu5B/BIOmlsAjskdqpn81y8UBVEGuHODd7/ci5DvoSzTQ==
+react-icons@4.11.0:
+  version "4.11.0"
+  resolved "https://registry.yarnpkg.com/react-icons/-/react-icons-4.11.0.tgz#4b0e31c9bfc919608095cc429c4f1846f4d66c65"
+  integrity sha512-V+4khzYcE5EBk/BvcuYRq6V/osf11ODUM2J8hg2FDSswRrGvqiYUYPRy4OdrWaQOBj4NcpJfmHZLNaD+VH0TyA==
 
 react-icons@^3.9.0:
   version "3.11.0"
@@ -13133,7 +13308,7 @@ react-is@^16.10.2, react-is@^16.13.1, react-is@^16.7.0:
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
   integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
 
-react-is@^17.0.1, react-is@^17.0.2:
+react-is@^17.0.0, react-is@^17.0.1, react-is@^17.0.2:
   version "17.0.2"
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0"
   integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==
@@ -13153,6 +13328,25 @@ react-loader-spinner@5.1.4:
   resolved "https://registry.yarnpkg.com/react-loader-spinner/-/react-loader-spinner-5.1.4.tgz#1f08e21aa6b721b6c303658a294f4afbf8ad8d05"
   integrity sha512-qjS/5/+tWX4gPJXdA1LB2HzTHv/CX/2ic+DBLLbacM+S7gVTfS7L7gJ7IOZMzVXETaiItdCUh3f8Y2GoYG5o8Q==
 
+react-markdown@6.0.3:
+  version "6.0.3"
+  resolved "https://registry.yarnpkg.com/react-markdown/-/react-markdown-6.0.3.tgz#625ec767fa321d91801129387e7d31ee0cb99254"
+  integrity sha512-kQbpWiMoBHnj9myLlmZG9T1JdoT/OEyHK7hqM6CqFT14MAkgWiWBUYijLyBmxbntaN6dCDicPcUhWhci1QYodg==
+  dependencies:
+    "@types/hast" "^2.0.0"
+    "@types/unist" "^2.0.3"
+    comma-separated-tokens "^1.0.0"
+    prop-types "^15.7.2"
+    property-information "^5.3.0"
+    react-is "^17.0.0"
+    remark-parse "^9.0.0"
+    remark-rehype "^8.0.0"
+    space-separated-tokens "^1.1.0"
+    style-to-object "^0.3.0"
+    unified "^9.0.0"
+    unist-util-visit "^2.0.0"
+    vfile "^4.0.0"
+
 react-no-ssr@1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/react-no-ssr/-/react-no-ssr-1.1.0.tgz#313b48d2e26020f969ed98e472f10481604e3cc8"
@@ -13520,10 +13714,10 @@ recoil-persist@4.2.0:
   resolved "https://registry.yarnpkg.com/recoil-persist/-/recoil-persist-4.2.0.tgz#9fbb4a8c158cbb83ceb9dedf795aee24ed15395d"
   integrity sha512-MHVfML9GxJP3RpkKR4F5rp7DtvzIvjWhowtMao/b7h2k4afMio/4sMAdUtltIrDaeVegH0Iga8Sx5XQ3oD7CzA==
 
-recoil@0.7.6:
-  version "0.7.6"
-  resolved "https://registry.yarnpkg.com/recoil/-/recoil-0.7.6.tgz#75297ecd70bbfeeb72e861aa6141a86bb6dfcd5e"
-  integrity sha512-hsBEw7jFdpBCY/tu2GweiyaqHKxVj6EqF2/SfrglbKvJHhpN57SANWvPW+gE90i3Awi+A5gssOd3u+vWlT+g7g==
+recoil@0.7.7:
+  version "0.7.7"
+  resolved "https://registry.yarnpkg.com/recoil/-/recoil-0.7.7.tgz#c5f2c843224384c9c09e4a62c060fb4c1454dc8e"
+  integrity sha512-8Og5KPQW9LwC577Vc7Ug2P0vQshkv1y3zG3tSSkWMqkWSwHmE+by06L8JtnGocjW6gcCvfwB3YtrJG6/tWivNQ==
   dependencies:
     hamt_plus "1.0.2"
 
@@ -13661,6 +13855,20 @@ regjsparser@^0.8.2:
   dependencies:
     jsesc "~0.5.0"
 
+rehype-raw@5.1.0:
+  version "5.1.0"
+  resolved "https://registry.yarnpkg.com/rehype-raw/-/rehype-raw-5.1.0.tgz#66d5e8d7188ada2d31bc137bc19a1000cf2c6b7e"
+  integrity sha512-MDvHAb/5mUnif2R+0IPCYJU8WjHa9UzGtM/F4AVy5GixPlDZ1z3HacYy4xojDU+uBa+0X/3PIfyQI26/2ljJNA==
+  dependencies:
+    hast-util-raw "^6.1.0"
+
+rehype-sanitize@4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/rehype-sanitize/-/rehype-sanitize-4.0.0.tgz#b5241cf66bcedc49cd4e924a5f7a252f00a151ad"
+  integrity sha512-ZCr/iQRr4JeqPjun5i9CHHILVY7i45VnLu1CkkibDrSyFQ7dTLSvw8OIQpHhS4RSh9h/9GidxFw1bRb0LOxIag==
+  dependencies:
+    hast-util-sanitize "^3.0.0"
+
 rehype-slug@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/rehype-slug/-/rehype-slug-4.0.1.tgz#313274501cffa997bd52dd57bf2da5851959747a"
@@ -13695,6 +13903,14 @@ remark-footnotes@2.0.0:
   resolved "https://registry.yarnpkg.com/remark-footnotes/-/remark-footnotes-2.0.0.tgz#9001c4c2ffebba55695d2dd80ffb8b82f7e6303f"
   integrity sha512-3Clt8ZMH75Ayjp9q4CorNeyjwIxHFcTkaektplKGl2A1jNGEUey8cKL0ZC5vJwfcD5GFGsNLImLG/NGzWIzoMQ==
 
+remark-gfm@1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/remark-gfm/-/remark-gfm-1.0.0.tgz#9213643001be3f277da6256464d56fd28c3b3c0d"
+  integrity sha512-KfexHJCiqvrdBZVbQ6RopMZGwaXz6wFJEfByIuEwGf0arvITHjiKKZ1dpXujjH9KZdm1//XJQwgfnJ3lmXaDPA==
+  dependencies:
+    mdast-util-gfm "^0.1.0"
+    micromark-extension-gfm "^0.3.0"
+
 remark-images@2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/remark-images/-/remark-images-2.0.0.tgz#7621a406364c3a0a6e4250c3ee63909cc14a2388"
@@ -13704,10 +13920,13 @@ remark-images@2.0.0:
     unist-util-is "^4.0.0"
     unist-util-visit-parents "^3.0.0"
 
-remark-math@3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/remark-math/-/remark-math-3.0.1.tgz#85a02a15b15cad34b89a27244d4887b3a95185bb"
-  integrity sha512-epT77R/HK0x7NqrWHdSV75uNLwn8g9qTyMqCRCDujL0vj/6T6+yhdrR7mjELWtkse+Fw02kijAaBuVcHBor1+Q==
+remark-math@4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/remark-math/-/remark-math-4.0.0.tgz#494ddd50766555ad2332e3afca7796a76452256f"
+  integrity sha512-lH7SoQenXtQrvL0bm+mjZbvOk//YWNuyR+MxV18Qyv8rgFmMEGNuB0TSCQDkoDaiJ40FCnG8lxErc/zhcedYbw==
+  dependencies:
+    mdast-util-math "^0.1.0"
+    micromark-extension-math "^0.1.0"
 
 remark-mdx@1.6.22:
   version "1.6.22"
@@ -13752,6 +13971,13 @@ remark-parse@^9.0.0:
   dependencies:
     mdast-util-from-markdown "^0.8.0"
 
+remark-rehype@^8.0.0:
+  version "8.1.0"
+  resolved "https://registry.yarnpkg.com/remark-rehype/-/remark-rehype-8.1.0.tgz#610509a043484c1e697437fa5eb3fd992617c945"
+  integrity sha512-EbCu9kHgAxKmW1yEYjx3QafMyGY3q8noUbNUI5xyKbaFP89wbhDrKxyIQNukNYthzjNHZu6J7hwFg7hRm1svYA==
+  dependencies:
+    mdast-util-to-hast "^10.2.0"
+
 remark-slug@6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/remark-slug/-/remark-slug-6.0.0.tgz#2b54a14a7b50407a5e462ac2f376022cce263e2c"
@@ -14429,7 +14655,7 @@ source-map@~0.8.0-beta.0:
   dependencies:
     whatwg-url "^7.0.0"
 
-space-separated-tokens@^1.0.0:
+space-separated-tokens@^1.0.0, space-separated-tokens@^1.1.0:
   version "1.1.5"
   resolved "https://registry.yarnpkg.com/space-separated-tokens/-/space-separated-tokens-1.1.5.tgz#85f32c3d10d9682007e917414ddc5c26d1aa6899"
   integrity sha512-q/JSVd1Lptzhf5bkYm4ob4iWPjx0KiRe3sRFBNrVqbJkFaBm5vbbowy1mymoPNLRa52+oadOhJ+K49wsSeSjTA==
@@ -15793,7 +16019,7 @@ unified@9.2.0:
     trough "^1.0.0"
     vfile "^4.0.0"
 
-unified@^9.1.0:
+unified@^9.0.0, unified@^9.1.0:
   version "9.2.2"
   resolved "https://registry.yarnpkg.com/unified/-/unified-9.2.2.tgz#67649a1abfc3ab85d2969502902775eb03146975"
   integrity sha512-Sg7j110mtefBD+qunSLO1lqOEKdrwBFBrR6Qd8f4uwkhWNlbkaqwHse6e7QvD3AP/MNoJdEDLaf8OxYyoWgorQ==
diff --git a/packages_rs/nextclade/Cargo.toml b/packages_rs/nextclade/Cargo.toml
index 7f2fcf0ea..e49b356b5 100644
--- a/packages_rs/nextclade/Cargo.toml
+++ b/packages_rs/nextclade/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "nextclade"
-version = "2.14.0"
+version = "3.0.0-alpha.0"
 description = "Alignment, mutation calling, phylogenetic placement, clade assignment and quality control checks for viral genetic sequences. Library module."
 repository = "https://github.com/nextstrain/nextclade"
 documentation = "https://docs.nextstrain.org/projects/nextclade/en/stable/"
@@ -18,10 +18,9 @@ auto_ops = "=0.3.0"
 bio = "=1.3.1"
 bio-types = "=1.0.0"
 chrono = { version = "=0.4.26", default-features = false, features = ["clock", "std", "wasmbind"] }
-clap = { version = "=4.3.10", features = ["derive"] }
-clap-verbosity-flag = "=2.0.1"
-clap_complete = "=4.3.1"
-clap_complete_fig = "=4.3.1"
+clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] }
+clap_complete = "=4.4.1"
+clap_complete_fig = "=4.4.0"
 color-eyre = "=0.6.2"
 csv = "=1.2.2"
 ctor = "=0.2.2"
@@ -44,17 +43,19 @@ num = "=0.4.0"
 num-traits = "=0.2.15"
 num_cpus = "=1.16.0"
 optfield = "=0.3.0"
+ordered-float = { version = "=3.9.1", features = ["rand", "serde", "schemars"] }
 owo-colors = "=3.5.0"
 pretty_assertions = "=1.3.0"
 rayon = "=1.7.0"
 regex = "=1.8.4"
 schemars = { version = "=0.8.12", features = ["chrono", "either", "enumset", "indexmap"] }
-semver = "=1.0.17"
+semver = { version = "=1.0.17", features = ["serde"] }
 serde = { version = "=1.0.164", features = ["derive"] }
 serde_json = { version = "=1.0.99", features = ["preserve_order", "indexmap", "unbounded_depth"] }
 serde_repr = "=0.1.12"
 serde_stacker = { version = "=0.1.8" }
 serde_yaml = "=0.9.22"
+strsim = "=0.10.0"
 strum = "=0.25.0"
 strum_macros = "=0.25.0"
 tinytemplate = "=1.2.1"
diff --git a/packages_rs/nextclade/benches/bench_create_stripes.rs b/packages_rs/nextclade/benches/bench_create_stripes.rs
index 3a1e67ce8..728485685 100644
--- a/packages_rs/nextclade/benches/bench_create_stripes.rs
+++ b/packages_rs/nextclade/benches/bench_create_stripes.rs
@@ -1,5 +1,5 @@
 use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
-use nextclade::align::seed_alignment::create_stripes;
+use nextclade::align::seed_alignment::create_alignment_band;
 use nextclade::align::seed_match2::SeedMatch2;
 
 pub fn bench_create_stripes(c: &mut Criterion) {
@@ -22,20 +22,19 @@ pub fn bench_create_stripes(c: &mut Criterion) {
   let excess_bandwidth = black_box(2);
   let qry_len = black_box(30);
   let ref_len = black_box(40);
-  let max_indel = black_box(400);
-  let allowed_mismatches = black_box(2);
+  let minimal_bandwidth = black_box(0);
 
   let mut group = c.benchmark_group("create_stripes");
   group.throughput(Throughput::Bytes(qry_len as u64));
   group.bench_function("create_stripes", |b| {
     b.iter(|| {
-      create_stripes(
+      create_alignment_band(
         &seed_matches,
         qry_len,
         ref_len,
         terminal_bandwidth,
         excess_bandwidth,
-        allowed_mismatches,
+        minimal_bandwidth,
       )
     });
   });
diff --git a/packages_rs/nextclade/benches/bench_seed_alignment.rs b/packages_rs/nextclade/benches/bench_seed_alignment.rs
index 9262acdbd..66c7c85ad 100644
--- a/packages_rs/nextclade/benches/bench_seed_alignment.rs
+++ b/packages_rs/nextclade/benches/bench_seed_alignment.rs
@@ -3,14 +3,12 @@ use std::path::PathBuf;
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use nextclade::align::params::AlignPairwiseParams;
-use nextclade::align::seed_alignment::seed_alignment;
-use nextclade::align::seed_match2::CodonSpacedIndex;
+use nextclade::align::seed_alignment::create_alignment_band;
+use nextclade::align::seed_match2::{get_seed_matches_maybe_reverse_complement, CodonSpacedIndex, SeedMatchesResult};
 use nextclade::alphabet::nuc::to_nuc_seq;
-use nextclade::gene::gene_map::GeneMap;
 
 pub fn bench_seed_alignment(c: &mut Criterion) {
   let params = AlignPairwiseParams::default();
-  let gene_map = GeneMap::new();
 
   let test_data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test_data");
   let ref_path = test_data_dir.join("reference.fasta");
@@ -21,10 +19,22 @@ pub fn bench_seed_alignment(c: &mut Criterion) {
 
   let seed_index = CodonSpacedIndex::from_sequence(&ref_seq);
 
+  let minimal_bandwidth = black_box(0);
+
   let mut group = c.benchmark_group("seed_alignment");
   group.bench_function("seed_match", |b| {
     b.iter(|| {
-      seed_alignment(&qry_seq, &ref_seq, &seed_index, &params).unwrap();
+      let SeedMatchesResult { seed_matches, .. } =
+        get_seed_matches_maybe_reverse_complement(&qry_seq, &ref_seq, &seed_index, &params).unwrap();
+
+      create_alignment_band(
+        &seed_matches,
+        qry_seq.len() as isize,
+        ref_seq.len() as isize,
+        params.terminal_bandwidth as isize,
+        params.excess_bandwidth as isize,
+        minimal_bandwidth,
+      );
     });
   });
   group.finish();
diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs
index 243cb0f78..37e97bfa2 100644
--- a/packages_rs/nextclade/src/align/align.rs
+++ b/packages_rs/nextclade/src/align/align.rs
@@ -1,17 +1,17 @@
 use crate::align::backtrace::{backtrace, AlignmentOutput};
-use crate::align::band_2d::simple_stripes;
 use crate::align::band_2d::Stripe;
+use crate::align::band_2d::{full_matrix, simple_stripes};
 use crate::align::params::AlignPairwiseParams;
 use crate::align::score_matrix::{score_matrix, ScoreMatrixResult};
-use crate::align::seed_alignment::seed_alignment;
-use crate::align::seed_match2::CodonSpacedIndex;
+use crate::align::seed_alignment::create_alignment_band;
+use crate::align::seed_match2::{get_seed_matches_maybe_reverse_complement, CodonSpacedIndex, SeedMatchesResult};
 use crate::alphabet::aa::Aa;
 use crate::alphabet::letter::Letter;
 use crate::alphabet::nuc::Nuc;
 use crate::make_error;
-use crate::translate::complement::reverse_complement_in_place;
-use eyre::Report;
-use log::{info, trace, warn};
+use eyre::{Report, WrapErr};
+use log::{info, trace};
+use std::cmp::max;
 
 fn align_pairwise<T: Letter<T>>(
   qry_seq: &[T],
@@ -22,8 +22,6 @@ fn align_pairwise<T: Letter<T>>(
 ) -> AlignmentOutput<T> {
   trace!("Align pairwise: started. Params: {params:?}");
 
-  let max_indel = params.max_indel;
-
   let ScoreMatrixResult { scores, paths } = score_matrix(qry_seq, ref_seq, gap_open_close, stripes, params);
 
   backtrace(qry_seq, ref_seq, &scores, &paths)
@@ -39,32 +37,86 @@ pub fn align_nuc(
   gap_open_close: &[i32],
   params: &AlignPairwiseParams,
 ) -> Result<AlignmentOutput<Nuc>, Report> {
-  let qry_len: usize = qry_seq.len();
-  let min_len: usize = params.min_length;
+  let qry_len = qry_seq.len();
+  let ref_len = ref_seq.len();
+  let min_len = params.min_length;
   if qry_len < min_len {
     return make_error!(
       "Unable to align: sequence is too short. Details: sequence length: {qry_len}, min length allowed: {min_len}. This is likely due to a low quality of the provided sequence, or due to using incorrect reference sequence."
     );
   }
 
-  #[allow(clippy::map_err_ignore)]
-  match seed_alignment(qry_seq, ref_seq, seed_index, params) {
-    Ok(stripes) => Ok(align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes)),
-    Err(report) => {
-      if params.retry_reverse_complement {
-        info!("When processing sequence #{index} '{seq_name}': Seed matching failed. Retrying reverse complement");
-        let mut qry_seq = qry_seq.to_owned();
-        reverse_complement_in_place(&mut qry_seq);
-        let stripes = seed_alignment(&qry_seq, ref_seq, seed_index, params).map_err(|_| report)?;
-        let mut result = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
-        result.is_reverse_complement = true;
-        warn!("When processing sequence #{index} '{seq_name}': Sequence is reverse-complemented: Seed matching failed for the original sequence, but succeeded for its reverse complement. Outputs will be derived from the reverse complement and 'reverse complement' suffix will be added to the fasta header in the nucleotide alignment.");
-        Ok(result)
-      } else {
-        Err(report)
-      }
+  if ref_len + qry_len < (10 * params.seed_length) {
+    // for very short sequences, use full square
+    let stripes = full_matrix(ref_len, qry_len);
+    trace!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band construction: short sequences, using full matrix");
+    return Ok(align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes));
+  }
+
+  // otherwise, determine seed matches roughly regularly spaced along the query sequence
+  let SeedMatchesResult {
+    qry_seq,
+    seed_matches,
+    is_reverse_complement,
+  } = get_seed_matches_maybe_reverse_complement(qry_seq, ref_seq, seed_index, params)
+    .wrap_err("When calculating seed matches")?;
+
+  let mut terminal_bandwidth = params.terminal_bandwidth as isize;
+  let mut excess_bandwidth = params.excess_bandwidth as isize;
+  let mut minimal_bandwidth = max(1, params.allowed_mismatches as isize);
+  let max_band_area = params.max_band_area;
+  let mut attempt = 0;
+
+  let (mut stripes, mut band_area) = create_alignment_band(
+    &seed_matches,
+    qry_len as isize,
+    ref_len as isize,
+    terminal_bandwidth,
+    excess_bandwidth,
+    minimal_bandwidth,
+  );
+  if band_area > max_band_area {
+    return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json");
+  }
+
+  let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
+
+  while alignment.hit_boundary && attempt < params.max_alignment_attempts {
+    info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt+1, alignment.alignment_score);
+    // double bandwidth parameters or increase to one if 0
+    terminal_bandwidth = max(2 * terminal_bandwidth, 1);
+    excess_bandwidth = max(2 * excess_bandwidth, 1);
+    minimal_bandwidth = max(2 * minimal_bandwidth, 1);
+    attempt += 1;
+    // make new band
+    (stripes, band_area) = create_alignment_band(
+      &seed_matches,
+      qry_len as isize,
+      ref_len as isize,
+      terminal_bandwidth,
+      excess_bandwidth,
+      minimal_bandwidth,
+    );
+    // discard stripes and break to return previous alignment
+    if band_area > max_band_area {
+      break;
+    }
+    // realign
+    alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
+  }
+  // report success/failure of broadening of band width
+  if alignment.hit_boundary {
+    info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Returning last attempt with score: {}", alignment.alignment_score);
+    if band_area > max_band_area {
+      info!(
+        "When processing sequence #{index} '{seq_name}': final band area {band_area} exceeded the cutoff {max_band_area}"
+      );
     }
+  } else if attempt > 0 {
+    info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score);
   }
+  alignment.is_reverse_complement = is_reverse_complement;
+  Ok(alignment)
 }
 
 /// align amino acids using a fixed bandwidth banded alignment while penalizing terminal indels
diff --git a/packages_rs/nextclade/src/align/backtrace.rs b/packages_rs/nextclade/src/align/backtrace.rs
index 876f313e5..f883ffb3f 100644
--- a/packages_rs/nextclade/src/align/backtrace.rs
+++ b/packages_rs/nextclade/src/align/backtrace.rs
@@ -1,7 +1,8 @@
-use crate::align::band_2d::Band2d;
-use crate::align::score_matrix::{MATCH, QRY_GAP_EXTEND, QRY_GAP_MATRIX, REF_GAP_EXTEND, REF_GAP_MATRIX};
+use crate::align::band_2d::{Band2d, Stripe};
+use crate::align::score_matrix::{BOUNDARY, MATCH, QRY_GAP_EXTEND, QRY_GAP_MATRIX, REF_GAP_EXTEND, REF_GAP_MATRIX};
 use crate::alphabet::letter::Letter;
 use crate::utils::vec2d::Vec2d;
+use log::warn;
 use serde::{Deserialize, Serialize};
 use std::cmp;
 
@@ -15,6 +16,7 @@ pub struct AlignmentOutput<T> {
   pub ref_seq: Vec<T>,
   pub alignment_score: i32,
   pub is_reverse_complement: bool,
+  pub hit_boundary: bool,
 }
 
 pub fn backtrace<T: Letter<T>>(
@@ -39,10 +41,13 @@ pub fn backtrace<T: Letter<T>>(
 
   let mut origin: i8;
   let mut current_matrix = 0;
-
+  let mut hit_boundary = false;
   // Do backtrace in the aligned region
   while r_pos > 0 || q_pos > 0 {
     origin = paths[(r_pos, q_pos)];
+    if (origin & BOUNDARY) > 0 {
+      hit_boundary = true;
+    }
 
     if (origin & MATCH) != 0 && (current_matrix == 0) {
       // Match -- decrement both strands and add match to alignment
@@ -92,6 +97,7 @@ pub fn backtrace<T: Letter<T>>(
     ref_seq: aln_ref,
     alignment_score: scores[(num_rows - 1, num_cols - 1)],
     is_reverse_complement: false,
+    hit_boundary,
   }
 }
 
@@ -165,6 +171,7 @@ mod tests {
       ref_seq: to_nuc_seq("ACGCTCGCT")?,
       alignment_score: 18,
       is_reverse_complement: false,
+      hit_boundary: false,
     };
 
     let output = backtrace(&qry_seq, &ref_seq, &scores, &paths);
diff --git a/packages_rs/nextclade/src/align/params.rs b/packages_rs/nextclade/src/align/params.rs
index 72a42ece5..f08cba7d6 100644
--- a/packages_rs/nextclade/src/align/params.rs
+++ b/packages_rs/nextclade/src/align/params.rs
@@ -16,6 +16,7 @@ pub enum GapAlignmentSide {
 #[allow(clippy::struct_excessive_bools)]
 #[optfield(pub AlignPairwiseParamsOptional, attrs, doc, field_attrs, field_doc, merge_fn = pub)]
 #[derive(Parser, Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
+#[serde(rename_all = "camelCase")]
 pub struct AlignPairwiseParams {
   /// Minimum length of nucleotide sequence to consider for alignment.
   ///
@@ -47,6 +48,10 @@ pub struct AlignPairwiseParams {
   #[clap(long)]
   pub score_match: i32,
 
+  /// Maximum area of the band in the alignment matrix. Alignments with large bands are slow to compute and require substantial memory. Alignment of sequences requiring bands with area larger than this value, will not be attempted and a warning will be emitted.
+  #[clap(long)]
+  pub max_band_area: usize,
+
   /// Maximum length of insertions or deletions allowed to proceed with alignment. Alignments with long indels are slow to compute and require substantial memory in the current implementation. Alignment of sequences with indels longer that this value, will not be attempted and a warning will be emitted.
   #[clap(long)]
   pub max_indel: usize,
@@ -127,6 +132,10 @@ pub struct AlignPairwiseParams {
   /// to proceed with the banded alignment.
   #[clap(long)]
   pub min_seed_cover: f64,
+
+  /// Number of times Nextclade will retry alignment with more relaxed results if alignment band boundaries are hit
+  #[clap(long)]
+  pub max_alignment_attempts: usize,
 }
 
 impl Default for AlignPairwiseParams {
@@ -139,12 +148,13 @@ impl Default for AlignPairwiseParams {
       penalty_gap_open_out_of_frame: 8,
       penalty_mismatch: 1,
       score_match: 3,
-      max_indel: 400,        // to be replaced
-      seed_length: 21,       // obsolete
-      min_seeds: 10,         // obsolete
-      min_match_rate: 0.3,   // obsolete
-      seed_spacing: 100,     // obsolete
-      mismatches_allowed: 3, // obsolete
+      max_band_area: 500_000_000, // requires around 500Mb for paths, 2GB for the scores
+      max_indel: 400,             // obsolete
+      seed_length: 21,            // obsolete
+      min_seeds: 10,              // obsolete
+      min_match_rate: 0.3,        // obsolete
+      seed_spacing: 100,          // obsolete
+      mismatches_allowed: 3,      // obsolete
       retry_reverse_complement: false,
       no_translate_past_stop: false,
       left_terminal_gaps_free: true,
@@ -158,6 +168,7 @@ impl Default for AlignPairwiseParams {
       min_match_length: 40,  // Experimentally determined, to keep off-target matches reasonably low
       allowed_mismatches: 8, // Ns count as mismatches
       window_size: 30,
+      max_alignment_attempts: 3,
     }
   }
 }
diff --git a/packages_rs/nextclade/src/align/score_matrix.rs b/packages_rs/nextclade/src/align/score_matrix.rs
index e284a99fa..2823b6aec 100644
--- a/packages_rs/nextclade/src/align/score_matrix.rs
+++ b/packages_rs/nextclade/src/align/score_matrix.rs
@@ -11,6 +11,7 @@ pub const QRY_GAP_MATRIX: i8 = 1 << 2;
 // these are the override flags for gap extension
 pub const REF_GAP_EXTEND: i8 = 1 << 3;
 pub const QRY_GAP_EXTEND: i8 = 1 << 4;
+pub const BOUNDARY: i8 = 1 << 5;
 
 const NO_ALIGN: i32 = -1_000_000_000; //very negative to be able to process unalignable seqs
 
@@ -111,9 +112,8 @@ pub fn score_matrix<T: Letter<T>>(
         // if the position is within the query sequence
         // no gap -- match case
 
-        // TODO: Double bounds check -> wasteful, make better
+        // ^ If stripes allow to move up diagonally to upper left
         if qpos > stripes[ri - 1].begin && qpos - 1 < stripes[ri - 1].end {
-          // ^ If stripes allow to move up diagonally to upper left
           score = if qry_seq[qpos - 1].is_unknown() || ref_seq[ri - 1].is_unknown() {
             // no need to look-up match score since unknown matches with everything.
             // reduce match score by 1 to de-prioritize matches with unknown states.
@@ -124,6 +124,8 @@ pub fn score_matrix<T: Letter<T>>(
             scores[(ri - 1, qpos - 1)] - params.penalty_mismatch
           };
           origin = MATCH;
+        } else {
+          tmp_path = tmp_path | BOUNDARY; // mark boundary when possible moves are restricted. here: can't move up or left-up
         }
 
         // check the scores of a reference gap
@@ -154,6 +156,8 @@ pub fn score_matrix<T: Letter<T>>(
             score = tmp_score;
             origin = REF_GAP_MATRIX;
           }
+        } else if ri < n_rows - 1 {
+          tmp_path = tmp_path | BOUNDARY; // mark boundary if no ref gap allowed due to stripes: can't move left
         }
 
         // check the scores of a query gap
@@ -180,8 +184,9 @@ pub fn score_matrix<T: Letter<T>>(
             score = tmp_score;
             origin = QRY_GAP_MATRIX;
           }
-        } else {
+        } else if qpos < n_cols - 1 {
           qry_gaps[qpos] = NO_ALIGN;
+          tmp_path = tmp_path | BOUNDARY; // mark boundary if no ref gap allowed due to stripes: can't move up.
         }
       }
 
@@ -233,29 +238,50 @@ mod tests {
 
   #[rstest]
   fn pads_missing_left(ctx: Context) -> Result<(), Report> {
-    let qry_seq = to_nuc_seq("CTCGCT")?;
-    let ref_seq = to_nuc_seq("ACGCTCGCT")?;
+    let qry_seq = to_nuc_seq("CTCGCTG")?;
+    let ref_seq = to_nuc_seq("ACGCTCGCTG")?;
 
     let band_width = 5;
     let mean_shift = 2;
 
-    let stripes = simple_stripes(mean_shift, band_width, ref_seq.len(), qry_seq.len());
+    let mut stripes = simple_stripes(mean_shift, band_width, ref_seq.len(), qry_seq.len());
+    stripes[2].end = stripes[2].end - 1;
+    stripes[8].begin = stripes[8].begin + 1;
     let result = score_matrix(&qry_seq, &ref_seq, &ctx.gap_open_close, &stripes, &ctx.params);
 
+    #[rustfmt::skip]
     let expected_scores = Band2d::<i32>::with_data(
       &stripes,
       &[
-        0, 0, 0, 0, 0, -1, -1, -1, -1, 0, 3, -2, 2, -2, 2, 0, -1, 2, -3, 5, -1, 1, 0, 3, -2, 5, -1, 8, 2, 0, -1, 6, 0,
-        4, 2, 11, 0, 3, 0, 9, 3, 7, 11, 0, -1, 2, 3, 12, 6, 11, 3, 0, 5, 6, 15, 11, 6, 6, 6, 9, 18,
+         0,  0,  0,  0,
+         0, -1, -1, -1, -1,
+         0,  3, -2,  2, -2,
+         0, -1,  2, -3,  5, -1, -1,
+         0,  3, -2,  5, -1,  8,  2,  2,
+         0, -1,  6,  0,  4,  2, 11,  5,
+         0,  3,  0,  9,  3,  7,  5, 10,
+         0, -1,  2,  3, 12,  6,  6, 10,
+                 0,  5,  6, 15,  9, 10,
+                 0,  3,  6,  9, 18, 12,
+                     3,  6,  9, 12, 21,
       ],
     );
 
+    #[rustfmt::skip]
     let expected_paths = Band2d::<i8>::with_data(
       &stripes,
       &[
-        0, 10, 10, 10, 20, 1, 9, 9, 9, 20, 17, 17, 25, 9, 9, 20, 1, 25, 1, 25, 2, 9, 20, 17, 1, 25, 2, 25, 2, 20, 17,
-        25, 2, 25, 12, 9, 20, 17, 4, 25, 18, 25, 12, 20, 17, 25, 4, 17, 18, 28, 17, 20, 25, 4, 17, 20, 17, 18, 26, 12,
-        17,
+         0, 10, 10, 10,
+        20,  1,  9,  9, 41,
+        20, 17, 17, 25,  9,
+        20,  1, 25,  1, 25, 34, 42,
+        20, 17,  1, 25,  2,  9,  2, 10,
+        20, 17, 25,  2, 25, 12,  9, 2,
+        20, 17,  4, 25, 18, 25, 12, 9,
+        20, 17, 25,  4, 17, 18, 25, 12,
+                52, 17,  4, 17, 18, 28,
+                52, 20, 20,  4, 17, 18,
+                    20, 17, 20,  4,  1
       ],
     );
 
diff --git a/packages_rs/nextclade/src/align/seed_alignment.rs b/packages_rs/nextclade/src/align/seed_alignment.rs
index c15383aca..5c8ee9e61 100644
--- a/packages_rs/nextclade/src/align/seed_alignment.rs
+++ b/packages_rs/nextclade/src/align/seed_alignment.rs
@@ -136,36 +136,6 @@ pub fn get_seed_matches<L: Letter<L>>(
   (seed_matches, n_seeds)
 }
 
-/// Determine rough positioning of qry to reference sequence by approximate seed matching
-/// Returns vector of stripes, that is a band within which the alignment is expected to lie
-pub fn seed_alignment(
-  qry_seq: &[Nuc],
-  ref_seq: &[Nuc],
-  seed_index: &CodonSpacedIndex,
-  params: &AlignPairwiseParams,
-) -> Result<Vec<Stripe>, Report> {
-  let qry_len = qry_seq.len();
-  let ref_len = ref_seq.len();
-
-  if ref_len + qry_len < (10 * params.seed_length) {
-    // for very short sequences, use full square
-    let stripes = full_matrix(ref_len, qry_len);
-    trace!("Band construction: Short qry&ref sequence (< 5*seed_length), thus using full matrix");
-    Ok(stripes)
-  } else {
-    // otherwise, determine seed matches roughly regularly spaced along the query sequence
-    let seed_matches = get_seed_matches2(qry_seq, ref_seq, seed_index, params)?;
-    create_stripes(
-      &seed_matches,
-      qry_len as isize,
-      ref_len as isize,
-      params.terminal_bandwidth as isize,
-      params.excess_bandwidth as isize,
-      params.allowed_mismatches as isize,
-    )
-  }
-}
-
 fn abs_shift(seed1: &SeedMatch2, seed2: &SeedMatch2) -> isize {
   abs(seed2.offset - seed1.offset)
 }
@@ -238,14 +208,14 @@ fn extend_and_rewind(
 
 /// Takes in seed matches and returns a vector of stripes
 /// Stripes define the query sequence range for each reference position
-pub fn create_stripes(
+pub fn create_alignment_band(
   chain: &[SeedMatch2],
   qry_len: isize,
   ref_len: isize,
   terminal_bandwidth: isize,
   excess_bandwidth: isize,
   minimal_bandwidth: isize,
-) -> Result<Vec<Stripe>, Report> {
+) -> (Vec<Stripe>, usize) {
   // This function steps through the chained seeds and determines and appropriate band
   // defined via stripes in query coordinates. These bands will later be chopped to reachable ranges
 
@@ -256,7 +226,6 @@ pub fn create_stripes(
   // post: deal with the terminal trapezoid and allow of terminal bandwidth
 
   let mut bands = Vec::<TrapezoidDirectParams>::with_capacity(2 * chain.len() + 2);
-
   // make initial trapezoid starting at 0 and extending into match by terminal_bandwidth
   let mut current_seed = &chain[0];
   let mut look_back_length = terminal_bandwidth;
@@ -294,8 +263,8 @@ pub fn create_stripes(
     current_band = TrapezoidDirectParams {
       ref_start: current_ref_end,
       ref_end: next_seed.ref_pos as isize + look_forward_length,
-      min_offset: mean_offset - look_back_length - excess_bandwidth,
-      max_offset: mean_offset + look_back_length + excess_bandwidth,
+      min_offset: mean_offset - max(look_back_length, excess_bandwidth),
+      max_offset: mean_offset + max(look_back_length, excess_bandwidth),
     };
     current_seed = next_seed;
   }
@@ -336,9 +305,7 @@ pub fn create_stripes(
   // write_stripes_to_file(&stripes, "stripes.csv");
 
   // trim stripes to reachable regions
-  let regularized_stripes = regularize_stripes(stripes, qry_len as usize);
-
-  Ok(regularized_stripes)
+  regularize_stripes(stripes, qry_len as usize)
 }
 
 #[derive(Clone, Copy, Debug)]
@@ -351,7 +318,7 @@ struct TrapezoidDirectParams {
 
 /// Chop off unreachable parts of the stripes.
 /// Overhanging parts are pruned
-fn regularize_stripes(mut stripes: Vec<Stripe>, qry_len: usize) -> Vec<Stripe> {
+fn regularize_stripes(mut stripes: Vec<Stripe>, qry_len: usize) -> (Vec<Stripe>, usize) {
   // assure stripe begin are non-decreasing -- such states would be unreachable in the alignment
   let stripes_len = stripes.len();
   stripes[0].begin = 0;
@@ -361,11 +328,13 @@ fn regularize_stripes(mut stripes: Vec<Stripe>, qry_len: usize) -> Vec<Stripe> {
 
   // analogously, assure that strip ends are non-decreasing. this needs to be done in reverse.
   stripes[stripes_len - 1].end = qry_len + 1;
+  let mut band_area = stripes[stripes_len - 1].end - stripes[stripes_len - 1].begin;
   for i in (0..(stripes_len - 1)).rev() {
     stripes[i].end = clamp(stripes[i].end, stripes[i].begin + 1, stripes[i + 1].end);
+    band_area += stripes[i].end - stripes[i].begin;
   }
 
-  stripes
+  (stripes, band_area)
 }
 
 fn trace_stripe_stats(stripes: &[Stripe]) {
@@ -445,7 +414,7 @@ mod tests {
     let qry_len = 30;
     let ref_len = 40;
 
-    let result = create_stripes(
+    let result = create_alignment_band(
       &seed_matches,
       qry_len,
       ref_len,
diff --git a/packages_rs/nextclade/src/align/seed_match2.rs b/packages_rs/nextclade/src/align/seed_match2.rs
index 360a78922..f9ad964ae 100644
--- a/packages_rs/nextclade/src/align/seed_match2.rs
+++ b/packages_rs/nextclade/src/align/seed_match2.rs
@@ -3,6 +3,7 @@ use crate::align::seed_alignment::write_matches_to_file;
 use crate::alphabet::letter::Letter;
 use crate::alphabet::nuc::{from_nuc_seq, Nuc};
 use crate::make_error;
+use crate::translate::complement::reverse_complement_in_place;
 use bio::alphabets;
 use bio::data_structures::bwt::{bwt, less, Less, Occ, BWT};
 use bio::data_structures::fmindex::{BackwardSearchResult, FMIndex, FMIndexable};
@@ -11,6 +12,7 @@ use eyre::Report;
 use gcollections::ops::{Bounded, Intersection, IsEmpty, Union};
 use interval::interval_set::{IntervalSet, ToIntervalSet};
 use itertools::Itertools;
+use std::borrow::Cow;
 use std::cmp::{max, min};
 use std::collections::{BTreeMap, VecDeque};
 
@@ -119,7 +121,7 @@ impl SeedMatch2 {
     // counter to keep track of total number of mismatches in window
     let mut forward_mismatches = 0;
 
-    while forward_mismatches < config.allowed_mismatches && length < max_length {
+    while forward_mismatches <= config.allowed_mismatches && length < max_length {
       // remove first position in queue, decrement mismatch counter in case of mismatch
       if mismatch_queue.pop_front().unwrap() {
         forward_mismatches = forward_mismatches.saturating_sub(1);
@@ -157,7 +159,7 @@ impl SeedMatch2 {
     // repeat in other direction
     mismatch_queue = VecDeque::from(vec![false; config.window_size]);
     let mut backward_mismatches = 0;
-    while backward_mismatches < config.allowed_mismatches && ref_pos > 0 && qry_pos > 0 {
+    while backward_mismatches <= config.allowed_mismatches && ref_pos > 0 && qry_pos > 0 {
       if mismatch_queue.pop_front().unwrap() {
         backward_mismatches = backward_mismatches.saturating_sub(1);
       }
@@ -496,6 +498,42 @@ pub fn get_seed_matches2(
   Ok(seed_matches)
 }
 
+pub struct SeedMatchesResult<'a> {
+  pub qry_seq: Cow<'a, [Nuc]>,
+  pub seed_matches: Vec<SeedMatch2>,
+  pub is_reverse_complement: bool,
+}
+
+#[allow(clippy::map_err_ignore)]
+pub fn get_seed_matches_maybe_reverse_complement<'a>(
+  qry_seq: &'a [Nuc],
+  ref_seq: &[Nuc],
+  seed_index: &CodonSpacedIndex,
+  params: &AlignPairwiseParams,
+) -> Result<SeedMatchesResult<'a>, Report> {
+  match get_seed_matches2(qry_seq, ref_seq, seed_index, params) {
+    Ok(seed_matches) => Ok(SeedMatchesResult {
+      qry_seq: Cow::Borrowed(qry_seq),
+      seed_matches,
+      is_reverse_complement: false,
+    }),
+    Err(report) => {
+      if params.retry_reverse_complement {
+        let mut rev_complement = qry_seq.to_owned();
+        reverse_complement_in_place(&mut rev_complement);
+        let seed_matches = get_seed_matches2(&rev_complement, ref_seq, seed_index, params).map_err(|_| report)?;
+        Ok(SeedMatchesResult {
+          qry_seq: Cow::Owned(rev_complement),
+          seed_matches,
+          is_reverse_complement: true,
+        })
+      } else {
+        Err(report)
+      }
+    }
+  }
+}
+
 #[cfg(test)]
 mod tests {
   use super::*;
diff --git a/packages_rs/nextclade/src/alphabet/aa.rs b/packages_rs/nextclade/src/alphabet/aa.rs
index 32780767c..f4a03ce21 100644
--- a/packages_rs/nextclade/src/alphabet/aa.rs
+++ b/packages_rs/nextclade/src/alphabet/aa.rs
@@ -3,6 +3,7 @@ use crate::alphabet::letter::{Letter, ScoreMatrixLookup};
 use crate::make_error;
 use eyre::{eyre, Report, WrapErr};
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::fmt::{Display, Formatter};
 
 #[repr(u8)]
 #[derive(
@@ -51,18 +52,18 @@ impl Aa {
   }
 }
 
-impl ToString for Aa {
-  fn to_string(&self) -> String {
-    String::from(from_aa(*self))
-  }
-}
-
 impl ScoreMatrixLookup<Aa> for Aa {
   fn lookup_match_score(x: Aa, y: Aa) -> i32 {
     lookup_aa_scoring_matrix(x, y)
   }
 }
 
+impl Display for Aa {
+  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+    write!(f, "{}", from_aa(*self))
+  }
+}
+
 impl Letter<Aa> for Aa {
   const GAP: Aa = Aa::Gap;
 
diff --git a/packages_rs/nextclade/src/alphabet/letter.rs b/packages_rs/nextclade/src/alphabet/letter.rs
index 3bc29356e..a5623e2ea 100644
--- a/packages_rs/nextclade/src/alphabet/letter.rs
+++ b/packages_rs/nextclade/src/alphabet/letter.rs
@@ -1,6 +1,7 @@
 use color_eyre::{Section, SectionExt};
 use eyre::{Report, WrapErr};
 use serde::{Deserialize, Deserializer, Serializer};
+use std::fmt::Display;
 
 /// Allows to lookup scores for nucleotides and amino acids in a generic way
 pub trait ScoreMatrixLookup<T> {
@@ -8,7 +9,7 @@ pub trait ScoreMatrixLookup<T> {
 }
 
 /// Generic representation of a character defining nucleotide or amino acid
-pub trait Letter<L>: Copy + Eq + Ord + ScoreMatrixLookup<L> {
+pub trait Letter<L>: Copy + Display + Eq + Ord + ScoreMatrixLookup<L> {
   const GAP: L;
 
   fn is_gap(&self) -> bool;
diff --git a/packages_rs/nextclade/src/alphabet/nuc.rs b/packages_rs/nextclade/src/alphabet/nuc.rs
index f4812d19f..cf7cebd38 100644
--- a/packages_rs/nextclade/src/alphabet/nuc.rs
+++ b/packages_rs/nextclade/src/alphabet/nuc.rs
@@ -3,6 +3,7 @@ use crate::alphabet::letter::{Letter, ScoreMatrixLookup};
 use crate::make_error;
 use eyre::{eyre, Report, WrapErr};
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::fmt::{Display, Formatter};
 
 #[repr(u8)]
 #[derive(
@@ -30,12 +31,6 @@ pub enum Nuc {
   Gap,
 }
 
-impl ToString for Nuc {
-  fn to_string(&self) -> String {
-    String::from(from_nuc(*self))
-  }
-}
-
 impl Nuc {
   #[inline]
   pub const fn is_acgt(self) -> bool {
@@ -54,6 +49,12 @@ impl ScoreMatrixLookup<Nuc> for Nuc {
   }
 }
 
+impl Display for Nuc {
+  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+    write!(f, "{}", from_nuc(*self))
+  }
+}
+
 impl Letter<Nuc> for Nuc {
   const GAP: Nuc = Nuc::Gap;
 
diff --git a/packages_rs/nextclade/src/analyze/divergence.rs b/packages_rs/nextclade/src/analyze/divergence.rs
index 381a509ab..6662e410c 100644
--- a/packages_rs/nextclade/src/analyze/divergence.rs
+++ b/packages_rs/nextclade/src/analyze/divergence.rs
@@ -1,20 +1,59 @@
 use crate::analyze::nuc_sub::NucSub;
+use crate::coord::range::NucRefGlobalRange;
+use crate::tree::params::TreeBuilderParams;
 use crate::tree::tree::DivergenceUnits;
 
-/// Calculate number of nuc muts, only considering ACGT characters
-pub fn count_nuc_muts(nuc_muts: &[NucSub]) -> usize {
-  nuc_muts
+pub struct NucMutsCounted<'a> {
+  muts: Vec<&'a NucSub>,
+  masked_muts: Vec<&'a NucSub>,
+  other_muts: Vec<&'a NucSub>,
+  n_muts: usize,
+  n_masked_muts: usize,
+  n_other_muts: usize,
+}
+
+pub fn count_nuc_muts<'a>(nuc_muts: &'a [NucSub], masked_ranges: &[NucRefGlobalRange]) -> NucMutsCounted<'a> {
+  // Split away non_acgt mutations
+  let (nuc_muts, other_muts): (Vec<_>, Vec<_>) = nuc_muts
+    .iter()
+    .partition(|m| m.ref_nuc.is_acgt() && m.qry_nuc.is_acgt());
+
+  // Split away masked mutations
+  let (masked_muts, muts): (Vec<_>, Vec<_>) = nuc_muts
     .iter()
-    .filter(|m| m.ref_nuc.is_acgt() && m.qry_nuc.is_acgt())
-    .count()
+    .partition(|m| masked_ranges.iter().any(|range| range.contains(m.pos)));
+
+  let n_muts = muts.len();
+  let n_masked_muts = masked_muts.len();
+  let n_other_muts = other_muts.len();
+
+  NucMutsCounted {
+    muts,
+    masked_muts,
+    other_muts,
+    n_muts,
+    n_masked_muts,
+    n_other_muts,
+  }
 }
 
 pub fn calculate_branch_length(
-  private_mutations: &[NucSub],
+  nuc_muts: &[NucSub],
+  masked_ranges: &[NucRefGlobalRange],
   divergence_units: DivergenceUnits,
   ref_seq_len: usize,
 ) -> f64 {
-  let mut this_div = count_nuc_muts(private_mutations) as f64;
+  let NucMutsCounted {
+    n_muts,
+    n_masked_muts,
+    n_other_muts,
+    ..
+  } = count_nuc_muts(nuc_muts, masked_ranges);
+
+  let mut this_div = n_muts as f64;
+  if n_muts == 0 && n_masked_muts + n_other_muts > 0 {
+    this_div += 0.06;
+  }
 
   // If divergence is measured per site, divide by the length of reference sequence.
   // The unit of measurement is deduced from what's already is used in the reference tree nodes.
@@ -24,3 +63,26 @@ pub fn calculate_branch_length(
 
   this_div
 }
+
+/// Calculate nuc mut score
+pub fn score_nuc_muts(nuc_muts: &[NucSub], masked_ranges: &[NucRefGlobalRange], params: &TreeBuilderParams) -> f64 {
+  let NucMutsCounted {
+    n_muts,
+    n_masked_muts,
+    n_other_muts,
+    ..
+  } = count_nuc_muts(nuc_muts, masked_ranges);
+  let mut score = n_muts as f64;
+  // modify the score by sub-integer amounts for masked and other mutations. this effectively means
+  // scoring is first by n_muts, then by masked_muts, then by other_muts
+  if n_masked_muts > 0 {
+    // independent of their number, masked nucleotides increase the score by 0.5
+    score += 0.5;
+  }
+  if n_other_muts > 0 {
+    // other mutations (mostly to and from gap) by 0.1
+    score += 0.1;
+  }
+
+  score
+}
diff --git a/packages_rs/nextclade/src/analyze/find_private_nuc_mutations.rs b/packages_rs/nextclade/src/analyze/find_private_nuc_mutations.rs
index f43bebbbb..ffcfa8b5d 100644
--- a/packages_rs/nextclade/src/analyze/find_private_nuc_mutations.rs
+++ b/packages_rs/nextclade/src/analyze/find_private_nuc_mutations.rs
@@ -1,7 +1,6 @@
 use crate::alphabet::letter::Letter;
 use crate::alphabet::nuc::Nuc;
 use crate::analyze::aa_sub::AaSub;
-use crate::analyze::divergence::count_nuc_muts;
 use crate::analyze::is_sequenced::{is_nuc_non_acgtn, is_nuc_sequenced};
 use crate::analyze::letter_ranges::NucRange;
 use crate::analyze::nuc_del::{NucDel, NucDelRange};
@@ -34,7 +33,7 @@ impl BranchMutations {
   }
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Default, Serialize, Deserialize, schemars::JsonSchema, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct PrivateNucMutations {
   /// All private substitution mutations
@@ -115,7 +114,7 @@ pub fn find_private_nuc_mutations(
 
   let (labeled_substitutions, unlabeled_substitutions) = label_private_mutations(
     &non_reversion_substitutions,
-    &virus_properties.nuc_mut_label_maps.substitution_label_map,
+    &virus_properties.mut_labels.nuc_mut_label_map,
   );
 
   let mut private_substitutions = concat_to_vec(&reversion_substitutions, &non_reversion_substitutions);
diff --git a/packages_rs/nextclade/src/analyze/mod.rs b/packages_rs/nextclade/src/analyze/mod.rs
index b4833b2e6..71ad47350 100644
--- a/packages_rs/nextclade/src/analyze/mod.rs
+++ b/packages_rs/nextclade/src/analyze/mod.rs
@@ -15,6 +15,5 @@ pub mod nuc_changes;
 pub mod nuc_del;
 pub mod nuc_sub;
 pub mod pcr_primer_changes;
-pub mod pcr_primers;
 pub mod phenotype;
 pub mod virus_properties;
diff --git a/packages_rs/nextclade/src/analyze/pcr_primer_changes.rs b/packages_rs/nextclade/src/analyze/pcr_primer_changes.rs
index 73af88323..cd0aaef20 100644
--- a/packages_rs/nextclade/src/analyze/pcr_primer_changes.rs
+++ b/packages_rs/nextclade/src/analyze/pcr_primer_changes.rs
@@ -1,9 +1,27 @@
-use crate::alphabet::nuc::is_nuc_match;
+use crate::alphabet::nuc::{is_nuc_match, Nuc};
 use crate::analyze::nuc_sub::NucSub;
-use crate::analyze::pcr_primers::PcrPrimer;
+use crate::coord::range::NucRefGlobalRange;
+use crate::gene::genotype::Genotype;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 
+#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct PcrPrimer {
+  pub name: String,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub description: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub source: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub target: Option<String>,
+  pub ref_oligonuc: String,
+  pub primer_oligonuc: String,
+  pub range: NucRefGlobalRange,
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub non_acgts: Vec<Genotype<Nuc>>,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct PcrPrimerChange {
diff --git a/packages_rs/nextclade/src/analyze/pcr_primers.rs b/packages_rs/nextclade/src/analyze/pcr_primers.rs
deleted file mode 100644
index 0fff92743..000000000
--- a/packages_rs/nextclade/src/analyze/pcr_primers.rs
+++ /dev/null
@@ -1,158 +0,0 @@
-use crate::alphabet::nuc::{from_nuc_seq, to_nuc_seq, Nuc};
-use crate::coord::range::NucRefGlobalRange;
-use crate::gene::genotype::Genotype;
-use crate::io::csv::parse_csv;
-use crate::io::fs::read_file_to_string;
-use crate::make_error;
-use crate::translate::complement::reverse_complement_in_place;
-use eyre::{Report, WrapErr};
-use itertools::Itertools;
-use log::warn;
-use regex::Regex;
-use serde::{Deserialize, Serialize};
-use std::path::Path;
-
-#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct PcrPrimerCsvRow {
-  #[serde(rename = "Country (Institute)")]
-  pub source: String,
-
-  #[serde(rename = "Target")]
-  pub target: String,
-
-  #[serde(rename = "Oligonucleotide")]
-  pub name: String,
-
-  #[serde(rename = "Sequence")]
-  pub primer_oligonuc: String,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct PcrPrimer {
-  pub source: String,
-  pub target: String,
-  pub name: String,
-  pub root_oligonuc: String,
-  pub primer_oligonuc: String,
-  pub range: NucRefGlobalRange,
-  #[serde(rename = "nonACGTs")]
-  pub non_acgts: Vec<Genotype<Nuc>>,
-}
-
-impl PcrPrimer {
-  pub fn from_str(s: &str, ref_seq_str: &str) -> Result<Vec<Self>, Report> {
-    let raw: Vec<PcrPrimerCsvRow> = parse_csv(s)?;
-    raw
-      .into_iter()
-      .map(|raw_primer| convert_pcr_primer(raw_primer, ref_seq_str))
-      .collect::<Result<Vec<Self>, Report>>()
-  }
-
-  pub fn from_path(filepath: impl AsRef<Path>, ref_seq_str: &str) -> Result<Vec<Self>, Report> {
-    let filepath = filepath.as_ref();
-
-    let data =
-      read_file_to_string(filepath).wrap_err_with(|| format!("When reading PCR primers file {filepath:#?}"))?;
-
-    Self::from_str(&data, ref_seq_str).wrap_err_with(|| format!("When parsing PCR primers file {filepath:#?}"))
-  }
-}
-
-pub fn convert_pcr_primer(raw: PcrPrimerCsvRow, ref_seq_str: &str) -> Result<PcrPrimer, Report> {
-  let PcrPrimerCsvRow {
-    source,
-    target,
-    name,
-    primer_oligonuc,
-  } = raw;
-
-  let mut primer_oligonuc = to_nuc_seq(&primer_oligonuc)?;
-
-  // If this is a reverse primer, we need to reverse-complement it before attempting to match with root sequence
-  if name.ends_with("_R") {
-    reverse_complement_in_place(&mut primer_oligonuc);
-  }
-
-  let mut root_oligonuc = find_primer_in_ref_seq(&primer_oligonuc, ref_seq_str);
-  if root_oligonuc.is_none() {
-    // If nothing found, reverse-complement the primer and retry search
-    reverse_complement_in_place(&mut primer_oligonuc);
-    root_oligonuc = find_primer_in_ref_seq(&primer_oligonuc, ref_seq_str);
-  }
-
-  match root_oligonuc {
-    None => {
-      make_error!(
-        "PCR primer not found in reference sequence: name: '{}', source: '{}', oligonuc: '{}'. \
-        This might mean that the list of primers is not compatible with the root sequence used.",
-        name,
-        source,
-        from_nuc_seq(&primer_oligonuc)
-      )
-    }
-    Some((begin, root_oligonuc)) => {
-      let range = NucRefGlobalRange::from_usize(begin, begin + root_oligonuc.len());
-
-      let non_acgts = find_non_acgt(&primer_oligonuc);
-
-      Ok(PcrPrimer {
-        source,
-        target,
-        name,
-        root_oligonuc: from_nuc_seq(&root_oligonuc),
-        primer_oligonuc: from_nuc_seq(&primer_oligonuc),
-        range,
-        non_acgts,
-      })
-    }
-  }
-}
-
-/// Finds PCR primer oligonucleotide fragment in reference sequence. Returns position of the begin of the fragment
-/// in the reference sequence and the corresponding fragment of reference sequence.
-pub fn find_primer_in_ref_seq(primer_oligonuc: &[Nuc], ref_seq_str: &str) -> Option<(usize, Vec<Nuc>)> {
-  // Remove all non-ACGTN from the primer
-  let primer_oligonuc_sanitized = from_nuc_seq(primer_oligonuc)
-    .chars()
-    .map(|nuc| if is_acgt_char(nuc) { nuc } else { '.' })
-    .collect::<String>();
-
-  match Regex::new(&primer_oligonuc_sanitized) {
-    Err(report) => {
-      warn!(
-        "When compiling regular expression for PCR primer search: '{}': {}",
-        primer_oligonuc_sanitized,
-        report.to_string()
-      );
-      None
-    }
-    Ok(primer_regex) => {
-      if let Some(captures) = primer_regex.captures(ref_seq_str) {
-        captures
-          .get(0)
-          .map(|capture| (capture.start(), to_nuc_seq(capture.as_str()).unwrap()))
-      } else {
-        None
-      }
-    }
-  }
-}
-
-pub const fn is_acgt_char(c: char) -> bool {
-  matches!(c.to_ascii_uppercase(), 'A' | 'C' | 'G' | 'T')
-}
-
-pub fn find_non_acgt(seq: &[Nuc]) -> Vec<Genotype<Nuc>> {
-  seq
-    .iter()
-    .enumerate()
-    .filter_map(|(pos, nuc)| {
-      (!nuc.is_acgt()).then_some(Genotype {
-        pos: pos.into(),
-        qry: *nuc,
-      })
-    })
-    .collect_vec()
-}
diff --git a/packages_rs/nextclade/src/analyze/virus_properties.rs b/packages_rs/nextclade/src/analyze/virus_properties.rs
index aaf51b794..1cec3211a 100644
--- a/packages_rs/nextclade/src/analyze/virus_properties.rs
+++ b/packages_rs/nextclade/src/analyze/virus_properties.rs
@@ -1,59 +1,96 @@
 use crate::align::params::AlignPairwiseParamsOptional;
 use crate::alphabet::aa::Aa;
-use crate::alphabet::letter::Letter;
 use crate::alphabet::nuc::Nuc;
+use crate::analyze::pcr_primer_changes::PcrPrimer;
 use crate::coord::position::AaRefPosition;
-use crate::coord::range::{AaRefRange, NucRefGlobalRange};
+use crate::coord::range::AaRefRange;
 use crate::gene::genotype::Genotype;
+use crate::io::dataset::{DatasetAttributes, DatasetCompatibility, DatasetFiles, DatasetVersion};
 use crate::io::fs::read_file_to_string;
 use crate::io::json::json_parse;
+use crate::io::schema_version::{SchemaVersion, SchemaVersionParams};
+use crate::qc::qc_config::QcConfig;
+use crate::run::params_general::NextcladeGeneralParamsOptional;
 use crate::tree::params::TreeBuilderParamsOptional;
+use crate::utils::boolean::{bool_false, bool_true};
 use eyre::{Report, WrapErr};
+use semver::Version;
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
 use std::path::Path;
 use std::str::FromStr;
 use validator::Validate;
 
-/// Raw JSON version of the `VirusProperties` struct
-#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema, Validate)]
-#[serde(rename_all = "camelCase")]
-struct VirusPropertiesRaw {
-  pub schema_version: String,
-  pub alignment_params: Option<AlignPairwiseParamsOptional>,
-  pub tree_builder_params: Option<TreeBuilderParamsOptional>,
-  pub nuc_mut_label_map: BTreeMap<String, Vec<String>>,
-  pub phenotype_data: Option<Vec<PhenotypeData>>,
-  #[serde(default)]
-  pub aa_motifs: Vec<AaMotifsDesc>,
-  #[serde(default)]
-  pub placement_mask_ranges: Vec<NucRefGlobalRange>, // 0-based, end-exclusive
-}
+const PATHOGEN_JSON_SCHEMA_VERSION_FROM: &str = "3.0.0";
+const PATHOGEN_JSON_SCHEMA_VERSION_TO: &str = "3.0.0";
 
 /// Contains external configuration and data specific for a particular pathogen
 #[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema, Validate)]
 #[serde(rename_all = "camelCase")]
 pub struct VirusProperties {
   pub schema_version: String,
+
+  pub attributes: DatasetAttributes,
+
+  pub files: DatasetFiles,
+
+  #[serde(default = "bool_false")]
+  pub deprecated: bool,
+
+  #[serde(default = "bool_true")]
+  pub enabled: bool,
+
+  #[serde(default = "bool_true")]
+  pub experimental: bool,
+
+  pub default_gene: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub gene_order_preference: Vec<String>,
+
+  #[serde(default)]
+  pub mut_labels: LabelledMutationsConfig,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub primers: Vec<PcrPrimer>,
+
+  pub qc: Option<QcConfig>,
+
+  pub general_params: Option<NextcladeGeneralParamsOptional>,
+
   pub alignment_params: Option<AlignPairwiseParamsOptional>,
+
   pub tree_builder_params: Option<TreeBuilderParamsOptional>,
-  pub nuc_mut_label_maps: MutationLabelMaps<Nuc>,
+
   pub phenotype_data: Option<Vec<PhenotypeData>>,
-  #[serde(default)]
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub aa_motifs: Vec<AaMotifsDesc>,
-  #[serde(default)]
-  pub placement_mask_ranges: Vec<NucRefGlobalRange>, // 0-based, end-exclusive
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub versions: Vec<DatasetVersion>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub version: Option<DatasetVersion>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub compatibility: Option<DatasetCompatibility>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
 /// Associates a genotype (pos, nuc) to a list of labels
 pub type LabelMap<L> = BTreeMap<Genotype<L>, Vec<String>>;
 pub type NucLabelMap = LabelMap<Nuc>;
 
-/// External data that contains labels assigned to many mutations
 #[derive(Debug, Default, Clone, Serialize, Deserialize, schemars::JsonSchema, Validate)]
 #[serde(rename_all = "camelCase")]
-pub struct MutationLabelMaps<L: Letter<L>> {
-  pub substitution_label_map: BTreeMap<Genotype<L>, Vec<String>>,
+pub struct LabelledMutationsConfig {
+  #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
+  pub nuc_mut_label_map: BTreeMap<Genotype<Nuc>, Vec<String>>,
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
 #[derive(Debug, Default, Clone, Serialize, Deserialize, schemars::JsonSchema, Validate)]
@@ -130,7 +167,7 @@ pub struct AaMotifsDesc {
   pub description: String,
   pub motifs: Vec<String>,
 
-  #[serde(default)]
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub include_genes: Vec<CountAaMotifsGeneDesc>,
 }
 
@@ -139,41 +176,35 @@ pub struct AaMotifsDesc {
 pub struct CountAaMotifsGeneDesc {
   pub gene: String,
 
-  #[serde(default)]
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub ranges: Vec<AaRefRange>,
 }
 
-impl FromStr for VirusProperties {
-  type Err = Report;
-
-  fn from_str(s: &str) -> Result<Self, Self::Err> {
-    let raw = json_parse::<VirusPropertiesRaw>(s)?;
-
-    let mut substitution_label_map = NucLabelMap::new();
-    for (mut_str, labels) in raw.nuc_mut_label_map {
-      let genotype = Genotype::<Nuc>::from_str(&mut_str)?;
-      if !genotype.qry.is_gap() {
-        substitution_label_map.insert(genotype, labels);
-      }
-    }
-
-    Ok(Self {
-      schema_version: raw.schema_version,
-      alignment_params: raw.alignment_params,
-      tree_builder_params: raw.tree_builder_params,
-      nuc_mut_label_maps: MutationLabelMaps { substitution_label_map },
-      phenotype_data: raw.phenotype_data,
-      aa_motifs: raw.aa_motifs,
-      placement_mask_ranges: raw.placement_mask_ranges,
-    })
-  }
-}
-
 impl VirusProperties {
   pub fn from_path(filepath: impl AsRef<Path>) -> Result<Self, Report> {
     let filepath = filepath.as_ref();
     let data =
-      read_file_to_string(filepath).wrap_err_with(|| format!("When reading virus properties file {filepath:#?}"))?;
-    Self::from_str(&data).wrap_err_with(|| format!("When parsing virus properties file {filepath:#?}"))
+      read_file_to_string(filepath).wrap_err_with(|| format!("When reading pathogen.json file: {filepath:#?}"))?;
+    Self::from_str(&data)
+  }
+
+  pub fn from_str(s: &impl AsRef<str>) -> Result<Self, Report> {
+    SchemaVersion::check_warn(
+      s,
+      &SchemaVersionParams {
+        name: "pathogen.json",
+        ver_from: Some(PATHOGEN_JSON_SCHEMA_VERSION_FROM),
+        ver_to: Some(PATHOGEN_JSON_SCHEMA_VERSION_TO),
+      },
+    );
+
+    json_parse::<VirusProperties>(s).wrap_err("When parsing pathogen.json file")
+  }
+
+  pub fn is_cli_compatible(&self, current_cli_version: &Version) -> bool {
+    self
+      .compatibility
+      .as_ref()
+      .map_or(true, |compat| compat.is_cli_compatible(current_cli_version))
   }
 }
diff --git a/packages_rs/nextclade/src/features/feature_tree.rs b/packages_rs/nextclade/src/features/feature_tree.rs
index fc5d0a94d..3e65dcbb0 100644
--- a/packages_rs/nextclade/src/features/feature_tree.rs
+++ b/packages_rs/nextclade/src/features/feature_tree.rs
@@ -184,7 +184,7 @@ fn process_gff_records<R: Read>(reader: &mut GffReader<R>) -> Result<Vec<Feature
   validate(&features)?;
 
   if features.is_empty() {
-    return make_error!("Gene map contains no features. This is not allowed.");
+    return make_error!("Genome annotation file contains no features. This is not allowed. Either add features to the file, or remove the file. Please report this to dataset authors.");
   }
 
   process_circular_features(&mut features)?;
@@ -212,7 +212,7 @@ fn process_circular_features(features: &mut [Feature]) -> Result<(), Report> {
       });
 
     if has_circular && landmark.is_none() {
-      return make_error!("Gene map is invalid: There are circular features in the genome, and this requires a landmark feature to be present. However, in genomic feature '{}', the column 'seqid' (column 0) refers to feature '{}', but the feature with such 'ID' attribute is not found. Make sure that the column 'seqid' (column 0) contains an 'ID' of the landmark feature and that this feature exists.", feature.name, feature.seqid)
+      return make_error!("Genome annotation is invalid: There are circular features in the genome, and this requires a landmark feature to be present. However, in genomic feature '{}', the column 'seqid' (column 0) refers to feature '{}', but the feature with such 'ID' attribute is not found. Make sure that the column 'seqid' (column 0) contains an 'ID' of the landmark feature and that this feature exists. Please report this to dataset authors.", feature.name, feature.seqid)
     }
 
     feature.landmark = landmark.map(Landmark::from_feature);
@@ -331,7 +331,7 @@ fn validate(features: &[Feature]) -> Result<(), Report> {
 
   if !errors.is_empty() {
     return make_error!(
-      "Gene map is invalid. The following errors were found:\n\n{}",
+      "Genome annotation is invalid. The following errors were found:\n\n{}\n\nPlease report this to dataset authors.",
       errors.join("\n\n")
     );
   }
diff --git a/packages_rs/nextclade/src/gene/cds.rs b/packages_rs/nextclade/src/gene/cds.rs
index ef7c964e6..847923cab 100644
--- a/packages_rs/nextclade/src/gene/cds.rs
+++ b/packages_rs/nextclade/src/gene/cds.rs
@@ -271,7 +271,7 @@ fn split_circular_cds_segments(segments: &[CdsSegment]) -> Result<Vec<CdsSegment
 fn validate_segment_bounds(segment: &CdsSegment, allow_overflow: bool) -> Result<(), Report> {
   if segment.range.begin > segment.range.end {
     return make_error!(
-      "Gene map is invalid: In genomic feature '{}': Feature start > end: {} > {}",
+      "Genome annotation is invalid: In genomic feature '{}': Feature start > end: {} > {}. Please report this to dataset authors.",
       segment.name,
       segment.range.begin + 1,
       segment.range.end + 1,
@@ -284,7 +284,7 @@ fn validate_segment_bounds(segment: &CdsSegment, allow_overflow: bool) -> Result
 
     if segment.range.begin < landmark_start {
       return make_error!(
-      "Gene map is invalid: In genomic feature '{}': Feature start at position {} is outside of landmark feature bounds: {}..{}",
+      "Genome annotation is invalid: In genomic feature '{}': Feature start at position {} is outside of landmark feature bounds: {}..{}. Please report this to dataset authors.",
       segment.name,
       segment.range.begin + 1,
       landmark_start + 1,
@@ -294,7 +294,7 @@ fn validate_segment_bounds(segment: &CdsSegment, allow_overflow: bool) -> Result
 
     if !allow_overflow && segment.range.end > landmark_end {
       return make_error!(
-      "Gene map is invalid: In genomic feature '{}': Feature end at position {} is outside of landmark feature bounds: {}..{}",
+      "Genome annotation is invalid: In genomic feature '{}': Feature end at position {} is outside of landmark feature bounds: {}..{}. Please report this to dataset authors.",
       segment.name,
       segment.range.end + 1,
       landmark_start + 1,
diff --git a/packages_rs/nextclade/src/gene/gene_map.rs b/packages_rs/nextclade/src/gene/gene_map.rs
index ad43d1d7a..7a8b2f7ec 100644
--- a/packages_rs/nextclade/src/gene/gene_map.rs
+++ b/packages_rs/nextclade/src/gene/gene_map.rs
@@ -19,7 +19,7 @@ use std::collections::BTreeMap;
 use std::fmt::Display;
 use std::path::Path;
 
-#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize, JsonSchema)]
 #[must_use]
 pub struct GeneMap {
   pub genes: BTreeMap<String, Gene>,
@@ -38,7 +38,7 @@ impl GeneMap {
     convert_feature_tree_to_gene_map(feature_tree)
   }
 
-  pub fn from_file<P: AsRef<Path>>(filename: P) -> Result<Self, Report> {
+  pub fn from_path<P: AsRef<Path>>(filename: P) -> Result<Self, Report> {
     let filename = filename.as_ref();
     let mut file = open_file_or_stdin(&Some(filename))?;
     let mut buf = vec![];
@@ -159,52 +159,25 @@ impl GeneMap {
   }
 }
 
-/// Filters gene map according to the list of requested genes.
-///
-/// Here are the possible combinations:
-///
-/// | --genemap  | --genes |                 behavior                   |
-/// |------------|---------|--------------------------------------------|
-/// |     +      |    +    | Take only specified genes                  |
-/// |     +      |         | Take all genes                             |
-/// |            |    +    | Error                                      |
-/// |            |         | Skip translation and codon penalties       |
-pub fn filter_gene_map(gene_map: Option<GeneMap>, genes: &Option<Vec<String>>) -> Result<GeneMap, Report> {
-  match (gene_map, genes) {
-    // Both gene map and list of genes are provided. Retain only requested genes.
-    (Some(gene_map), Some(genes)) => {
-      let gene_map: BTreeMap<String, Gene> = gene_map
-        .into_iter_genes()
-        .filter(|(gene_name, ..)| genes.contains(gene_name))
-        .collect();
-
-      let requested_genes_not_in_genemap = get_requested_genes_not_in_genemap(&gene_map, genes);
-      if !requested_genes_not_in_genemap.is_empty() {
-        warn!(
-          "The following genes were requested through `--genes` \
-           but not found in the gene map: \
+/// Filters genome annotation according to the list of requested genes.
+pub fn filter_gene_map(gene_map: GeneMap, genes: &Option<Vec<String>>) -> GeneMap {
+  if let Some(genes) = genes {
+    let gene_map: BTreeMap<String, Gene> = gene_map
+      .into_iter_genes()
+      .filter(|(gene_name, ..)| genes.contains(gene_name))
+      .collect();
+
+    let requested_genes_not_in_genemap = get_requested_genes_not_in_genemap(&gene_map, genes);
+    if !requested_genes_not_in_genemap.is_empty() {
+      warn!(
+        "The following genes were requested through `--genes` \
+           but not found in the genome annotation: \
            `{requested_genes_not_in_genemap}`",
-        );
-      }
-      Ok(GeneMap::from_genes(gene_map))
+      );
     }
-
-    // Only gene map is provided. Take all the genes.
-    (Some(gene_map), None) => Ok(gene_map),
-
-    // Gene list is provided, but no gene map. This is illegal.
-    (None, Some(_)) => {
-      make_error!(
-        "List of genes via '--genes' can only be specified \
-         when a gene map (genome annotation) is provided"
-      )
-    }
-
-    // Nothing is provided. Create an empty gene map.
-    // This disables codon-aware alignment, translation, AA mutations, frame shifts, and everything else that relies
-    // on gene information.
-    (None, None) => Ok(GeneMap::new()),
+    return GeneMap::from_genes(gene_map);
   }
+  gene_map
 }
 
 fn get_requested_genes_not_in_genemap(gene_map: &BTreeMap<String, Gene>, genes: &[String]) -> String {
@@ -216,7 +189,7 @@ fn get_requested_genes_not_in_genemap(gene_map: &BTreeMap<String, Gene>, genes:
 
 pub fn convert_feature_tree_to_gene_map(feature_tree: &FeatureTree) -> Result<GeneMap, Report> {
   let seq_region = take_exactly_one(&feature_tree.seq_regions)
-    .wrap_err_with(|| eyre!("Only feature trees with exactly one sequence region are supported. Please keep exactly one sequence region in gene map."))?;
+    .wrap_err_with(|| eyre!("Only feature trees with exactly one sequence region are supported. Please keep exactly one sequence region in genome annotation."))?;
   convert_seq_region_to_gene_map(seq_region)
 }
 
@@ -225,7 +198,7 @@ fn convert_seq_region_to_gene_map(seq_region: &SequenceRegion) -> Result<GeneMap
 
   if genes.is_empty() {
     return make_error!(
-      "Gene map: unable to find any genes or CDSes. Please make sure the genome annotation is correct."
+      "Genome annotation: unable to find any genes or CDSes. Please make sure the genome annotation is correct."
     );
   }
 
diff --git a/packages_rs/nextclade/src/gene/gene_map_display.rs b/packages_rs/nextclade/src/gene/gene_map_display.rs
index 2158fe64d..10079b49b 100644
--- a/packages_rs/nextclade/src/gene/gene_map_display.rs
+++ b/packages_rs/nextclade/src/gene/gene_map_display.rs
@@ -10,7 +10,6 @@ use eyre::Report;
 use itertools::{max as iter_max, Itertools};
 use num_traits::clamp;
 use owo_colors::OwoColorize;
-use regex::internal::Input;
 use std::cmp::{max, min};
 use std::io::Write;
 
diff --git a/packages_rs/nextclade/src/gene/genotype.rs b/packages_rs/nextclade/src/gene/genotype.rs
index eafc72d4e..8d22abb87 100644
--- a/packages_rs/nextclade/src/gene/genotype.rs
+++ b/packages_rs/nextclade/src/gene/genotype.rs
@@ -5,20 +5,51 @@ use crate::make_error;
 use eyre::{Report, WrapErr};
 use lazy_static::lazy_static;
 use regex::Regex;
-use serde::{Deserialize, Serialize};
+use serde::de::Error;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use std::cmp::Ordering;
+use std::fmt::{Display, Formatter};
+use std::str;
 use std::str::FromStr;
 
 const GENOTYPE_REGEX: &str = r"((?P<pos>\d{1,10})(?P<qry>[A-Z-]))";
 
 /// Represents a mutation without reference character known
-#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
+#[derive(Clone, Debug, Default, Eq, PartialEq, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct Genotype<L: Letter<L>> {
   pub pos: NucRefGlobalPosition,
   pub qry: L,
 }
 
+impl<L: Letter<L>> Display for Genotype<L> {
+  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+    write!(f, "{}{}", self.pos + 1, self.qry)
+  }
+}
+
+impl<'de, L> Deserialize<'de> for Genotype<L>
+where
+  L: Letter<L>,
+{
+  fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
+    let s = String::deserialize(deserializer)?;
+    Genotype::from_str(&s).map_err(Error::custom)
+  }
+}
+
+impl<L> Serialize for Genotype<L>
+where
+  L: Letter<L>,
+{
+  fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
+  where
+    Ser: Serializer,
+  {
+    serializer.serialize_str(&self.to_string())
+  }
+}
+
 impl<L: Letter<L>> FromStr for Genotype<L> {
   type Err = Report;
 
diff --git a/packages_rs/nextclade/src/graph/edge.rs b/packages_rs/nextclade/src/graph/edge.rs
index 1457a93f5..97bdae479 100644
--- a/packages_rs/nextclade/src/graph/edge.rs
+++ b/packages_rs/nextclade/src/graph/edge.rs
@@ -56,7 +56,7 @@ impl schemars::JsonSchema for GraphEdgeKey {
 /// Edge representing a connection between two nodes. Relevant data can be
 /// stored in the edge atomically. Edge's target and source node's are
 /// weak references and can't outlive the nodes they represent.
-#[derive(Debug, Serialize, Deserialize, JsonSchema)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 pub struct Edge<E: GraphEdge> {
   key: GraphEdgeKey,
   source: GraphNodeKey,
diff --git a/packages_rs/nextclade/src/graph/graph.rs b/packages_rs/nextclade/src/graph/graph.rs
index 6cc43d408..5ff65d06f 100644
--- a/packages_rs/nextclade/src/graph/graph.rs
+++ b/packages_rs/nextclade/src/graph/graph.rs
@@ -14,7 +14,7 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 
-#[derive(Debug, Serialize, Deserialize, JsonSchema)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[allow(clippy::partial_pub_fields)]
 pub struct Graph<N, E, D>
 where
@@ -557,6 +557,7 @@ pub fn convert_graph_to_auspice_tree(graph: &AuspiceGraph) -> Result<AuspiceTree
   let root = graph.get_exactly_one_root()?;
   let tree = convert_graph_to_auspice_tree_recursive(graph, root)?;
   Ok(AuspiceTree {
+    version: graph.data.auspice_tree_version.clone(),
     meta: graph.data.meta.clone(),
     tree,
     other: graph.data.other.clone(),
@@ -576,6 +577,7 @@ fn convert_graph_to_auspice_tree_recursive(
 
 pub fn convert_auspice_tree_to_graph(tree: AuspiceTree) -> Result<AuspiceGraph, Report> {
   let mut graph = AuspiceGraph::new(AuspiceGraphMeta {
+    auspice_tree_version: tree.version,
     meta: tree.meta,
     tmp: GraphTempData::default(),
     other: tree.other,
diff --git a/packages_rs/nextclade/src/graph/node.rs b/packages_rs/nextclade/src/graph/node.rs
index b6c2b208a..3f9bbf789 100644
--- a/packages_rs/nextclade/src/graph/node.rs
+++ b/packages_rs/nextclade/src/graph/node.rs
@@ -53,7 +53,7 @@ impl schemars::JsonSchema for GraphNodeKey {
 }
 
 /// Internal representation of a node in a graph
-#[derive(Debug, Serialize, Deserialize, JsonSchema)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 pub struct Node<N: GraphNode> {
   key: GraphNodeKey,
   #[serde(skip_serializing_if = "is_json_value_null")]
diff --git a/packages_rs/nextclade/src/io/compression.rs b/packages_rs/nextclade/src/io/compression.rs
index fa465b2df..c1e577bf8 100644
--- a/packages_rs/nextclade/src/io/compression.rs
+++ b/packages_rs/nextclade/src/io/compression.rs
@@ -6,8 +6,6 @@ use flate2::read::MultiGzDecoder;
 use flate2::write::GzEncoder;
 use flate2::Compression as GzCompressionLevel;
 use log::debug;
-use num::Integer;
-use num_traits::{FromPrimitive, NumCast, ToPrimitive};
 use std::env;
 use std::io::{ErrorKind, Read, Write};
 use std::path::Path;
@@ -42,7 +40,7 @@ pub enum CompressionType {
   #[cfg(not(target_arch = "wasm32"))]
   Xz,
   #[cfg(not(target_arch = "wasm32"))]
-  Zstandard,
+  Zstd,
 
   Gzip,
   None,
@@ -60,7 +58,9 @@ pub fn guess_compression_from_filepath(filepath: impl AsRef<Path>) -> (Compressi
         #[cfg(not(target_arch = "wasm32"))]
         "xz" => CompressionType::Xz,
         #[cfg(not(target_arch = "wasm32"))]
-        "zst" => CompressionType::Zstandard,
+        "zst" => CompressionType::Zstd,
+        #[cfg(not(target_arch = "wasm32"))]
+        "zstd" => CompressionType::Zstd,
         "gz" => CompressionType::Gzip,
         _ => CompressionType::None,
       };
@@ -89,7 +89,7 @@ impl<'r> Decompressor<'r> {
       #[cfg(not(target_arch = "wasm32"))]
       CompressionType::Xz => Box::new(XzDecoder::new_multi_decoder(reader)),
       #[cfg(not(target_arch = "wasm32"))]
-      CompressionType::Zstandard => Box::new(ZstdDecoder::new(reader)?),
+      CompressionType::Zstd => Box::new(ZstdDecoder::new(reader)?),
       CompressionType::Gzip => Box::new(MultiGzDecoder::new(reader)),
       CompressionType::None => Box::new(reader),
     };
@@ -104,13 +104,13 @@ impl<'r> Decompressor<'r> {
   pub fn from_str_and_path(content: &'r str, filepath: impl AsRef<Path>) -> Result<Self, Report> {
     let filepath = filepath.as_ref();
     let reader = content.as_bytes();
-    let (compression_type, ext) = guess_compression_from_filepath(filepath);
+    let (compression_type, _) = guess_compression_from_filepath(filepath);
     Self::new(reader, &compression_type)
   }
 
   pub fn from_path<R: 'r + Read>(reader: R, filepath: impl AsRef<Path>) -> Result<Self, Report> {
     let filepath = filepath.as_ref();
-    let (compression_type, ext) = guess_compression_from_filepath(filepath);
+    let (compression_type, _) = guess_compression_from_filepath(filepath);
     Self::new(reader, &compression_type)
   }
 }
@@ -155,7 +155,7 @@ impl<'w> Compressor<'w> {
       #[cfg(not(target_arch = "wasm32"))]
       CompressionType::Xz => Box::new(XzEncoder::new(writer, get_comp_level("XZ"))),
       #[cfg(not(target_arch = "wasm32"))]
-      CompressionType::Zstandard => Box::new(ZstdEncoder::new(writer, get_comp_level("ZST"))?.auto_finish()),
+      CompressionType::Zstd => Box::new(ZstdEncoder::new(writer, get_comp_level("ZST"))?.auto_finish()),
       CompressionType::Gzip => Box::new(GzEncoder::new(writer, GzCompressionLevel::new(get_comp_level("GZ")))),
       CompressionType::None => Box::new(writer),
     };
@@ -169,7 +169,7 @@ impl<'w> Compressor<'w> {
 
   pub fn from_path<W: 'w + Write + Send>(writer: W, filepath: impl AsRef<Path>) -> Result<Self, Report> {
     let filepath = filepath.as_ref();
-    let (compression_type, ext) = guess_compression_from_filepath(filepath);
+    let (compression_type, _) = guess_compression_from_filepath(filepath);
     Self::new(writer, &compression_type)
   }
 }
diff --git a/packages_rs/nextclade/src/io/dataset.rs b/packages_rs/nextclade/src/io/dataset.rs
index 8270b972c..8d9a90c65 100644
--- a/packages_rs/nextclade/src/io/dataset.rs
+++ b/packages_rs/nextclade/src/io/dataset.rs
@@ -1,127 +1,337 @@
-use eyre::WrapErr;
+use crate::io::json::json_parse;
+use crate::io::schema_version::{SchemaVersion, SchemaVersionParams};
+use crate::o;
+use eyre::Report;
+use itertools::Itertools;
+use schemars::JsonSchema;
 use semver::Version;
 use serde::{Deserialize, Serialize};
-use std::collections::{BTreeMap, HashMap};
-use std::str::FromStr;
+use std::cmp::Ordering;
+use std::collections::BTreeMap;
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+const INDEX_JSON_SCHEMA_VERSION_FROM: &str = "3.0.0";
+const INDEX_JSON_SCHEMA_VERSION_TO: &str = "3.0.0";
+
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct DatasetCompatibilityRange {
-  pub min: Option<String>,
-  pub max: Option<String>,
+pub struct DatasetsIndexJson {
+  pub collections: Vec<DatasetCollection>,
+
+  pub schema_version: String,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub minimizer_index: Vec<MinimizerIndexVersion>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
-#[serde(rename_all = "camelCase")]
-pub struct DatasetCompatibility {
-  pub nextclade_cli: DatasetCompatibilityRange,
-  pub nextclade_web: DatasetCompatibilityRange,
+impl DatasetsIndexJson {
+  pub fn from_str(s: impl AsRef<str>) -> Result<Self, Report> {
+    SchemaVersion::check_warn(
+      &s,
+      &SchemaVersionParams {
+        name: "index.json",
+        ver_from: Some(INDEX_JSON_SCHEMA_VERSION_FROM),
+        ver_to: Some(INDEX_JSON_SCHEMA_VERSION_TO),
+      },
+    );
+    json_parse(s)
+  }
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct DatasetAttributeValue {
-  pub is_default: bool,
-  pub value: String,
-  pub value_friendly: Option<String>,
+pub struct DatasetCollection {
+  pub meta: DatasetCollectionMeta,
+
+  pub datasets: Vec<Dataset>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct DatasetAttributes {
-  pub name: DatasetAttributeValue,
-  pub reference: DatasetAttributeValue,
-  pub tag: DatasetAttributeValue,
+pub struct Dataset {
+  pub path: String,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub deprecated: Option<bool>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub enabled: Option<bool>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub experimental: Option<bool>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub official: Option<bool>,
+
+  pub attributes: DatasetAttributes,
 
-  #[serde(skip_serializing_if = "Option::is_none")]
-  pub url: Option<DatasetAttributeValue>,
+  pub files: DatasetFiles,
+
+  #[serde(default, skip_serializing_if = "DatasetCapabilities::is_default")]
+  pub capabilities: DatasetCapabilities,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub versions: Vec<DatasetVersion>,
+
+  #[serde(default, skip_serializing_if = "DatasetVersion::is_empty")]
+  pub version: DatasetVersion,
 
   #[serde(flatten)]
-  pub rest_attrs: BTreeMap<String, DatasetAttributeValue>,
+  pub other: serde_json::Value,
+}
+
+impl Dataset {
+  pub fn tag(&self) -> &str {
+    &self.version.tag
+  }
+
+  pub fn root_path(&self) -> String {
+    [&self.path, &self.version.tag].iter().join("/")
+  }
+
+  pub fn file_path(&self, filename: impl AsRef<str>) -> String {
+    [&self.root_path(), filename.as_ref()].iter().join("/")
+  }
+
+  pub fn is_cli_compatible(&self, cli_version: &Version) -> bool {
+    self
+      .version
+      .compatibility
+      .as_ref()
+      .map_or(true, |compat| compat.is_cli_compatible(cli_version))
+  }
+
+  pub fn is_deprecated(&self) -> bool {
+    self.deprecated.unwrap_or(false)
+  }
+
+  pub fn is_enabled(&self) -> bool {
+    self.enabled.unwrap_or(false)
+  }
+
+  pub fn is_experimental(&self) -> bool {
+    self.experimental.unwrap_or(false)
+  }
+
+  pub fn is_official(&self) -> bool {
+    self.official.unwrap_or(false)
+  }
+
+  pub fn is_community(&self) -> bool {
+    !self.is_official()
+  }
+
+  pub fn is_latest(&self) -> bool {
+    if self.version.tag == "unreleased" || self.version.tag == "latest" {
+      return true;
+    }
+    self.versions.iter().sorted().next() == Some(&self.version)
+  }
+
+  pub fn is_tag(&self, tag: impl AsRef<str>) -> bool {
+    let tag = tag.as_ref();
+    self.version.tag == tag
+      || (self.version.tag == "unreleased" && tag == "latest")
+      || (self.version.tag == "latest" && tag == "unreleased")
+  }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DatasetVersion {
+  pub tag: String,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub updated_at: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub compatibility: Option<DatasetCompatibility>,
+}
+
+impl Eq for DatasetVersion {}
+
+impl PartialEq<Self> for DatasetVersion {
+  fn eq(&self, other: &Self) -> bool {
+    (self.tag).eq(&other.tag)
+  }
+}
+
+impl Ord for DatasetVersion {
+  fn cmp(&self, other: &Self) -> Ordering {
+    (self.tag).cmp(&other.tag)
+  }
+}
+
+impl PartialOrd<Self> for DatasetVersion {
+  fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+    (self.tag).partial_cmp(&other.tag)
+  }
+}
+
+impl DatasetVersion {
+  pub fn is_empty(&self) -> bool {
+    self == &Self::default()
+  }
+}
+
+impl Default for DatasetVersion {
+  fn default() -> Self {
+    Self {
+      tag: o!("unreleased"),
+      updated_at: None,
+      compatibility: None,
+    }
+  }
 }
 
-// TODO: move to VirusProperties
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct DatasetParams {
-  pub default_gene: Option<String>,
-  pub gene_order_preference: Option<Vec<String>>,
+pub struct DatasetCompatibility {
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  #[schemars(with = "String")]
+  pub cli: Option<Version>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  #[schemars(with = "String")]
+  pub web: Option<Version>,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+impl DatasetCompatibility {
+  pub fn is_cli_compatible(&self, cli_version: &Version) -> bool {
+    self
+      .cli
+      .as_ref()
+      .map_or(true, |min_cli_version| cli_version >= min_cli_version)
+  }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct Dataset {
-  pub enabled: bool,
-  pub attributes: DatasetAttributes,
-  pub comment: String,
-  pub compatibility: DatasetCompatibility,
-  pub files: BTreeMap<String, String>,
-  pub params: Option<DatasetParams>,
-  pub zip_bundle: String,
+pub struct DatasetCollectionMeta {
+  pub id: String,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub title: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub description: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub maintainers: Vec<DatasetCollectionUrl>,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub urls: Vec<DatasetCollectionUrl>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-impl Dataset {
+#[allow(clippy::struct_excessive_bools)]
+#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DatasetCapabilities {
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub qc: Vec<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub primers: Option<bool>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+impl DatasetCapabilities {
   #[inline]
-  pub const fn is_latest(&self) -> bool {
-    self.attributes.tag.is_default
+  pub fn is_default(&self) -> bool {
+    self == &Self::default()
   }
+}
 
-  pub fn is_compatible(&self, cli_version: &str) -> bool {
-    let this_version = Version::parse(cli_version)
-      .wrap_err_with(|| format!("Unable to parse version: '{cli_version}'"))
-      .unwrap();
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DatasetFiles {
+  pub reference: String,
 
-    let DatasetCompatibilityRange { min, max } = &self.compatibility.nextclade_cli;
+  pub pathogen_json: String,
 
-    let mut compatible = true;
-    if let Some(min) = min {
-      let min_version = Version::parse(min)
-        .wrap_err_with(|| format!("Unable to parse dataset min version: '{min}'"))
-        .unwrap();
-      compatible = compatible && (this_version >= min_version);
-    }
-    if let Some(max) = max {
-      let max_version = Version::parse(max)
-        .wrap_err_with(|| format!("Unable to parse dataset max version: '{max}'"))
-        .unwrap();
-      compatible = compatible && (this_version < max_version);
-    }
-    compatible
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub genome_annotation: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub tree_json: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub examples: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub readme: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub changelog: Option<String>,
+
+  #[serde(flatten, default, skip_serializing_if = "BTreeMap::is_empty")]
+  pub rest_files: BTreeMap<String, DatasetAttributeValue>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DatasetAttributeValue {
+  pub value: String,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub value_friendly: Option<String>,
+
+  #[serde(default, skip_serializing_if = "Option::is_none")]
+  pub is_default: Option<bool>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+impl DatasetAttributeValue {
+  pub fn is_default(&self) -> bool {
+    self.is_default.unwrap_or(false)
   }
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct DatasetsIndexJson {
-  pub schema: String,
-  pub datasets: Vec<Dataset>,
+pub struct DatasetAttributes {
+  pub name: DatasetAttributeValue,
+
+  pub reference: DatasetAttributeValue,
+
+  #[serde(flatten, default, skip_serializing_if = "BTreeMap::is_empty")]
+  pub rest_attrs: BTreeMap<String, DatasetAttributeValue>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
-pub struct DatasetFileUrls {
-  pub ref_record: String,
-  pub virus_properties: String,
-  pub tree: String,
-  pub gene_map: String,
-  pub qc_config: String,
-  pub primers: String,
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DatasetCollectionUrl {
+  pub name: String,
+
+  pub url: String,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-// TODO: consider replacing the same fields in DatasetsIndexJson with this struct
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct DatasetTagJson {
-  pub enabled: bool,
-  pub attributes: DatasetAttributes,
-  pub comment: String,
-  #[serde(skip_serializing_if = "Option::is_none")]
-  pub compatibility: Option<DatasetCompatibility>,
-  pub files: DatasetFileUrls,
-  pub params: DatasetParams,
-  #[serde(skip_serializing_if = "Option::is_none")]
-  pub zip_bundle: Option<String>,
-  #[serde(skip_serializing_if = "serde_json::Value::is_null")]
-  pub metadata: serde_json::Value,
+pub struct MinimizerIndexVersion {
+  pub version: String,
+  pub path: String,
   #[serde(flatten)]
   pub other: serde_json::Value,
 }
diff --git a/packages_rs/nextclade/src/io/errors_csv.rs b/packages_rs/nextclade/src/io/errors_csv.rs
deleted file mode 100644
index 8c37cced9..000000000
--- a/packages_rs/nextclade/src/io/errors_csv.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-use crate::gene::gene_map::GeneMap;
-use crate::io::csv::{CsvStructFileWriter, CsvStructWriter};
-use crate::io::nextclade_csv::format_failed_genes;
-use crate::types::outputs::PeptideWarning;
-use eyre::Report;
-use itertools::Itertools;
-use serde::{Deserialize, Serialize};
-use std::path::Path;
-
-#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct ErrorCsvEntry<'a, 'b> {
-  pub seq_name: &'a str,
-  pub errors: &'a str,
-  pub warnings: &'b str,
-  pub failed_genes: &'b str,
-}
-
-/// Writes errors.csv file
-pub struct ErrorsCsvWriter<'a> {
-  gene_map: &'a GeneMap,
-  writer: CsvStructFileWriter,
-}
-
-impl<'a> ErrorsCsvWriter<'a> {
-  pub fn new(gene_map: &'a GeneMap, filepath: impl AsRef<Path>) -> Result<Self, Report> {
-    Ok(Self {
-      gene_map,
-      writer: CsvStructFileWriter::new(filepath.as_ref(), b',')?,
-    })
-  }
-
-  /// Writes one row into errors.csv file for the case of nuc alignment error
-  pub fn write_nuc_error(&mut self, seq_name: &str, message: &str) -> Result<(), Report> {
-    self.writer.write(&ErrorCsvEntry {
-      seq_name,
-      errors: message,
-      warnings: "",
-      failed_genes: "",
-    })
-  }
-
-  /// Writes one row into errors.csv file for the case of aa alignment errors
-  pub fn write_aa_errors(
-    &mut self,
-    seq_name: &str,
-    warnings: &[PeptideWarning],
-    failed_genes: &[String],
-  ) -> Result<(), Report> {
-    let warnings = &warnings.iter().map(|PeptideWarning { warning, .. }| warning).join(";");
-    let failed_genes = &format_failed_genes(failed_genes, ";");
-    self.writer.write(&ErrorCsvEntry {
-      seq_name,
-      errors: "",
-      warnings,
-      failed_genes,
-    })
-  }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct ErrorsFromWeb {
-  seq_name: String,
-  errors: String,
-  warnings: Vec<PeptideWarning>,
-  failed_genes: Vec<String>,
-}
-
-pub fn errors_to_csv_string(errors: &[ErrorsFromWeb]) -> Result<String, Report> {
-  let mut buf = Vec::<u8>::new();
-
-  {
-    let mut writer = CsvStructWriter::new(&mut buf, b',')?;
-
-    for error in errors {
-      let warnings = &error
-        .warnings
-        .iter()
-        .map(|PeptideWarning { warning, .. }| warning)
-        .join(";");
-
-      let failed_genes = &format_failed_genes(&error.failed_genes, ";");
-
-      writer.write(&ErrorCsvEntry {
-        seq_name: &error.seq_name,
-        errors: &error.errors,
-        warnings,
-        failed_genes,
-      })?;
-    }
-  }
-
-  Ok(String::from_utf8(buf)?)
-}
diff --git a/packages_rs/nextclade/src/io/fasta.rs b/packages_rs/nextclade/src/io/fasta.rs
index 8ead69db4..9167cc1d7 100644
--- a/packages_rs/nextclade/src/io/fasta.rs
+++ b/packages_rs/nextclade/src/io/fasta.rs
@@ -59,8 +59,8 @@ impl<'a> FastaReader<'a> {
     }
   }
 
-  pub fn from_str(contents: &'a str) -> Result<Self, Report> {
-    let reader = contents.as_bytes();
+  pub fn from_str(contents: &'a impl AsRef<str>) -> Result<Self, Report> {
+    let reader = contents.as_ref().as_bytes();
     Ok(Self::new(Box::new(reader)))
   }
 
@@ -157,8 +157,8 @@ pub fn read_many_fasta<P: AsRef<Path>>(filepaths: &[P]) -> Result<Vec<FastaRecor
   Ok(fasta_records)
 }
 
-pub fn read_one_fasta_str(contents: &str) -> Result<FastaRecord, Report> {
-  let mut reader = FastaReader::from_str(contents)?;
+pub fn read_one_fasta_str(contents: impl AsRef<str>) -> Result<FastaRecord, Report> {
+  let mut reader = FastaReader::from_str(&contents)?;
   let mut record = FastaRecord::default();
   reader.read(&mut record)?;
   Ok(record)
diff --git a/packages_rs/nextclade/src/io/fs.rs b/packages_rs/nextclade/src/io/fs.rs
index 4120049a9..6c2aaff32 100644
--- a/packages_rs/nextclade/src/io/fs.rs
+++ b/packages_rs/nextclade/src/io/fs.rs
@@ -89,3 +89,11 @@ pub fn read_reader_to_string(reader: impl Read) -> Result<String, Report> {
   reader.read_to_string(&mut data)?;
   Ok(data)
 }
+
+pub fn path_to_string(p: impl AsRef<Path>) -> Result<String, Report> {
+  p.as_ref()
+    .as_os_str()
+    .to_str()
+    .map(ToOwned::to_owned)
+    .ok_or_else(|| eyre!("Unable to convert path to string: {:#?}", p.as_ref()))
+}
diff --git a/packages_rs/nextclade/src/io/insertions_csv.rs b/packages_rs/nextclade/src/io/insertions_csv.rs
deleted file mode 100644
index 8fc30bbc6..000000000
--- a/packages_rs/nextclade/src/io/insertions_csv.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-use crate::align::insertions_strip::{AaIns, Insertion};
-use crate::alphabet::nuc::Nuc;
-use crate::io::csv::{CsvStructFileWriter, CsvStructWriter};
-use crate::io::nextclade_csv::{format_aa_insertions, format_nuc_insertions};
-use crate::types::outputs::{
-  combine_outputs_and_errors_sorted, NextcladeErrorOutputs, NextcladeOutputOrError, NextcladeOutputs,
-};
-use eyre::Report;
-use serde::{Deserialize, Serialize};
-use std::path::Path;
-
-#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct InsertionCsvEntry<'a> {
-  pub seq_name: &'a str,
-  pub insertions: String,
-  pub aa_insertions: String,
-}
-
-/// Writes insertions.csv file
-pub struct InsertionsCsvWriter {
-  writer: CsvStructFileWriter,
-}
-
-impl InsertionsCsvWriter {
-  pub fn new(filepath: impl AsRef<Path>) -> Result<Self, Report> {
-    Ok(Self {
-      writer: CsvStructFileWriter::new(filepath.as_ref(), b',')?,
-    })
-  }
-
-  /// Writes one row into insertions.csv file
-  pub fn write(
-    &mut self,
-    seq_name: &str,
-    nuc_insertions: &[Insertion<Nuc>],
-    aa_insertions: &[AaIns],
-  ) -> Result<(), Report> {
-    self.writer.write(&InsertionCsvEntry {
-      seq_name,
-      insertions: format_nuc_insertions(nuc_insertions, ";"),
-      aa_insertions: format_aa_insertions(aa_insertions, ";"),
-    })
-  }
-}
-
-pub fn insertions_to_csv_string(
-  outputs: &[NextcladeOutputs],
-  errors: &[NextcladeErrorOutputs],
-) -> Result<String, Report> {
-  let mut buf = Vec::<u8>::new();
-  {
-    let mut writer = CsvStructWriter::new(&mut buf, b',')?;
-
-    let outputs_or_errors = combine_outputs_and_errors_sorted(outputs, errors);
-
-    for (_, output_or_error) in outputs_or_errors {
-      match output_or_error {
-        NextcladeOutputOrError::Outputs(output) => {
-          writer.write(&InsertionCsvEntry {
-            seq_name: &output.seq_name,
-            insertions: format_nuc_insertions(&output.insertions, ";"),
-            aa_insertions: format_aa_insertions(&output.aa_insertions, ";"),
-          })?;
-        }
-        NextcladeOutputOrError::Error(error) => {
-          writer.write(&InsertionCsvEntry {
-            seq_name: &error.seq_name,
-            insertions: "".to_owned(),
-            aa_insertions: "".to_owned(),
-          })?;
-        }
-      }
-    }
-  }
-  Ok(String::from_utf8(buf)?)
-}
diff --git a/packages_rs/nextclade/src/io/json.rs b/packages_rs/nextclade/src/io/json.rs
index 356c5d8c1..553e3a2b1 100644
--- a/packages_rs/nextclade/src/io/json.rs
+++ b/packages_rs/nextclade/src/io/json.rs
@@ -1,4 +1,5 @@
 use crate::io::file::create_file_or_stdout;
+use crate::io::yaml::yaml_write;
 use eyre::{Report, WrapErr};
 use serde::{Deserialize, Serialize};
 use serde_json::{de::Read, Deserializer};
@@ -25,8 +26,8 @@ pub fn deserialize_without_recursion_limit<'de, R: Read<'de>, T: Deserialize<'de
   Ok(obj)
 }
 
-pub fn json_parse<T: for<'de> Deserialize<'de>>(s: &str) -> Result<T, Report> {
-  let mut de = Deserializer::from_str(s);
+pub fn json_parse<T: for<'de> Deserialize<'de>>(s: impl AsRef<str>) -> Result<T, Report> {
+  let mut de = Deserializer::from_str(s.as_ref());
   deserialize_without_recursion_limit(&mut de)
 }
 
@@ -61,3 +62,13 @@ pub fn json_write<T: Serialize>(filepath: impl AsRef<Path>, obj: &T, pretty: Jso
   let file = create_file_or_stdout(filepath)?;
   json_write_impl(file, &obj, pretty).wrap_err("When writing JSON to file: {filepath:#?}")
 }
+
+pub fn json_or_yaml_write<T: Serialize>(filepath: impl AsRef<Path>, obj: &T) -> Result<(), Report> {
+  let filepath = filepath.as_ref();
+  let filepath_str = filepath.to_string_lossy();
+  if filepath_str.ends_with("yaml") || filepath_str.ends_with("yml") {
+    yaml_write(filepath, &obj)
+  } else {
+    json_write(filepath, &obj, JsonPretty(true))
+  }
+}
diff --git a/packages_rs/nextclade/src/io/mod.rs b/packages_rs/nextclade/src/io/mod.rs
index 9d59aa9fc..191448cfb 100644
--- a/packages_rs/nextclade/src/io/mod.rs
+++ b/packages_rs/nextclade/src/io/mod.rs
@@ -3,16 +3,15 @@ pub mod concat;
 pub mod console;
 pub mod csv;
 pub mod dataset;
-pub mod errors_csv;
 pub mod fasta;
 pub mod file;
 pub mod fs;
 pub mod gff3;
-pub mod insertions_csv;
 pub mod json;
 pub mod ndjson;
 pub mod nextclade_csv;
 pub mod nwk_writer;
 pub mod parse_pos;
 pub mod results_json;
+pub mod schema_version;
 pub mod yaml;
diff --git a/packages_rs/nextclade/src/io/nextclade_csv.rs b/packages_rs/nextclade/src/io/nextclade_csv.rs
index 676b736a7..b01cba894 100644
--- a/packages_rs/nextclade/src/io/nextclade_csv.rs
+++ b/packages_rs/nextclade/src/io/nextclade_csv.rs
@@ -24,7 +24,6 @@ use eyre::Report;
 use indexmap::{indexmap, IndexMap};
 use itertools::{chain, Either, Itertools};
 use lazy_static::lazy_static;
-use regex::internal::Input;
 use serde::{Deserialize, Serialize};
 use std::borrow::Cow;
 use std::fmt::Display;
diff --git a/packages_rs/nextclade/src/io/results_json.rs b/packages_rs/nextclade/src/io/results_json.rs
index f4228900e..61ef82ab9 100644
--- a/packages_rs/nextclade/src/io/results_json.rs
+++ b/packages_rs/nextclade/src/io/results_json.rs
@@ -9,6 +9,7 @@ use crate::utils::datetime::date_iso_now;
 use eyre::Report;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
+use crate::utils::info::this_package_version_str;
 
 #[derive(Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
@@ -33,11 +34,9 @@ pub struct ResultsJson {
 
 impl ResultsJson {
   pub fn new(clade_node_attrs: &[CladeNodeAttrKeyDesc], phenotype_attr_keys: &[PhenotypeAttrDesc]) -> Self {
-    const VERSION: &str = env!("CARGO_PKG_VERSION");
-
     Self {
-      schema_version: "1.0.0".to_owned(),
-      nextclade_algo_version: VERSION.to_owned(),
+      schema_version: "3.0.0".to_owned(),
+      nextclade_algo_version: this_package_version_str().to_owned(),
       nextclade_web_version: None,
       created_at: date_iso_now(),
       clade_node_attr_keys: clade_node_attrs.to_vec(),
diff --git a/packages_rs/nextclade/src/io/schema_version.rs b/packages_rs/nextclade/src/io/schema_version.rs
new file mode 100644
index 000000000..36fc3f056
--- /dev/null
+++ b/packages_rs/nextclade/src/io/schema_version.rs
@@ -0,0 +1,49 @@
+use crate::io::json::json_parse;
+use crate::make_error;
+use crate::utils::error::report_to_string;
+use eyre::Report;
+use log::warn;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SchemaVersion {
+  pub schema_version: String,
+}
+
+pub struct SchemaVersionParams<'s> {
+  pub name: &'s str,
+  pub ver_from: Option<&'s str>,
+  pub ver_to: Option<&'s str>,
+}
+
+impl SchemaVersion {
+  /// Parse JSON file and check `schemaVersion` field against provided max version, and return an error if not compatible.
+  pub fn check_err(
+    json_str: impl AsRef<str>,
+    SchemaVersionParams { name, ver_from, ver_to }: &SchemaVersionParams,
+  ) -> Result<SchemaVersion, Report> {
+    let sv: SchemaVersion = json_parse(json_str)?;
+
+    if let Some(ver_to) = ver_to {
+      if sv.schema_version.as_str() > ver_to {
+        return make_error!("The format version of '{}' file (schemaVersion={}) is newer than maximum version supported by this version of Nextclade (schemaVersion={}). This likely means that there are newer versions of Nextclade available which support this new format. In case of issues, please upgrade Nextclade to avoid incompatibility and to receive the latest features and bug fixes. Alternatively, you might try to use earlier versions of the dataset (not recommended).", name, sv.schema_version, ver_to);
+      }
+    }
+
+    if let Some(ver_from) = ver_from {
+      if sv.schema_version.as_str() < ver_from {
+        return make_error!("The format version of '{}' file (schemaVersion={}) is older than minimum version supported by this version of Nextclade (schemaVersion={}). This likely means that this version of Nextclade will have problems reading and understanding this file. In case of issues, please upgrade the dataset to avoid incompatibility and to receive the latest features and bug fixes. Alternatively, you might try to use earlier versions of Nextclade (not recommended).", name, sv.schema_version, ver_from);
+      }
+    }
+
+    Ok(sv)
+  }
+
+  /// Parse JSON file and check `schemaVersion` field against provided max version, and print a warning if not compatible.
+  pub fn check_warn(json_str: impl AsRef<str>, params: &SchemaVersionParams) {
+    if let Err(report) = Self::check_err(json_str, params) {
+      warn!("{}", report_to_string(&report));
+    }
+  }
+}
diff --git a/packages_rs/nextclade/src/lib.rs b/packages_rs/nextclade/src/lib.rs
index 4ad4d2a09..479947cb6 100644
--- a/packages_rs/nextclade/src/lib.rs
+++ b/packages_rs/nextclade/src/lib.rs
@@ -9,6 +9,7 @@ pub mod graph;
 pub mod io;
 pub mod qc;
 pub mod run;
+pub mod sort;
 pub mod translate;
 pub mod tree;
 pub mod types;
diff --git a/packages_rs/nextclade/src/qc/qc_config.rs b/packages_rs/nextclade/src/qc/qc_config.rs
index f80500aeb..208541fa9 100644
--- a/packages_rs/nextclade/src/qc/qc_config.rs
+++ b/packages_rs/nextclade/src/qc/qc_config.rs
@@ -79,6 +79,7 @@ pub struct FrameShiftLocation {
 #[serde(default)]
 pub struct QcRulesConfigFrameShifts {
   pub enabled: bool,
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub ignored_frame_shifts: Vec<FrameShiftLocation>,
   pub score_weight: f64,
 }
@@ -105,6 +106,7 @@ pub struct StopCodonLocation {
 #[serde(default)]
 pub struct QcRulesConfigStopCodons {
   pub enabled: bool,
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub ignored_stop_codons: Vec<StopCodonLocation>,
   pub score_weight: f64,
 }
diff --git a/packages_rs/nextclade/src/qc/qc_run.rs b/packages_rs/nextclade/src/qc/qc_run.rs
index d23cdc727..e9d91e03c 100644
--- a/packages_rs/nextclade/src/qc/qc_run.rs
+++ b/packages_rs/nextclade/src/qc/qc_run.rs
@@ -8,7 +8,7 @@ use crate::qc::qc_rule_private_mutations::{rule_private_mutations, QcResultPriva
 use crate::qc::qc_rule_snp_clusters::{rule_snp_clusters, QcResultSnpClusters};
 use crate::qc::qc_rule_stop_codons::{rule_stop_codons, QcResultStopCodons};
 use crate::translate::frame_shifts_translate::FrameShift;
-use crate::translate::translate_genes::{CdsTranslation, Translation};
+use crate::translate::translate_genes::Translation;
 use num::traits::Pow;
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
diff --git a/packages_rs/nextclade/src/run/mod.rs b/packages_rs/nextclade/src/run/mod.rs
index 40660bc21..ee178ca68 100644
--- a/packages_rs/nextclade/src/run/mod.rs
+++ b/packages_rs/nextclade/src/run/mod.rs
@@ -1,3 +1,4 @@
-pub mod nextalign_run_one;
 pub mod nextclade_run_one;
 pub mod nextclade_wasm;
+pub mod params;
+pub mod params_general;
diff --git a/packages_rs/nextclade/src/run/nextalign_run_one.rs b/packages_rs/nextclade/src/run/nextalign_run_one.rs
deleted file mode 100644
index 3e97412cd..000000000
--- a/packages_rs/nextclade/src/run/nextalign_run_one.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-use crate::align::align::align_nuc;
-use crate::align::insertions_strip::{get_aa_insertions, insertions_strip};
-use crate::align::params::AlignPairwiseParams;
-use crate::align::seed_match2::CodonSpacedIndex;
-use crate::alphabet::nuc::Nuc;
-use crate::coord::coord_map_global::CoordMapGlobal;
-use crate::gene::gene_map::GeneMap;
-use crate::translate::translate_genes::{translate_genes, Translation};
-use crate::types::outputs::{NextalignOutputs, PeptideWarning};
-use eyre::Report;
-use itertools::Itertools;
-use std::collections::HashSet;
-
-pub fn nextalign_run_one(
-  index: usize,
-  seq_name: &str,
-  qry_seq: &[Nuc],
-  ref_seq: &[Nuc],
-  seed_index: &CodonSpacedIndex,
-  ref_peptides: &Translation,
-  gene_map: &GeneMap,
-  gap_open_close_nuc: &[i32],
-  gap_open_close_aa: &[i32],
-  params: &AlignPairwiseParams,
-) -> Result<NextalignOutputs, Report> {
-  match align_nuc(
-    index,
-    seq_name,
-    qry_seq,
-    ref_seq,
-    seed_index,
-    gap_open_close_nuc,
-    params,
-  ) {
-    Err(report) => Err(report),
-
-    Ok(alignment) => {
-      let coord_map_global = CoordMapGlobal::new(&alignment.ref_seq);
-
-      let translation = translate_genes(
-        &alignment.qry_seq,
-        &alignment.ref_seq,
-        ref_peptides,
-        gene_map,
-        &coord_map_global,
-        gap_open_close_aa,
-        params,
-      )?;
-
-      let stripped = insertions_strip(&alignment.qry_seq, &alignment.ref_seq);
-
-      let present_genes: HashSet<String> = translation
-        .iter_genes()
-        .flat_map(|(_, gene_tr)| gene_tr.cdses.iter().map(|(_, cds_tr)| cds_tr.name.clone()))
-        .collect();
-
-      let missing_genes = gene_map
-        .iter_genes()
-        .filter_map(|(gene_name, _)| (!present_genes.contains(gene_name)).then_some(gene_name))
-        .cloned()
-        .collect_vec();
-
-      let is_reverse_complement = alignment.is_reverse_complement;
-
-      let warnings = {
-        let mut warnings = translation
-          .iter_genes()
-          .flat_map(|(_, gene_tr)| gene_tr.warnings.clone())
-          .collect_vec();
-
-        if is_reverse_complement {
-          warnings.push(PeptideWarning {
-            gene_name: "nuc".to_owned(),
-            warning: format!("When processing sequence #{index} '{seq_name}': Sequence is reverse-complemented: Seed matching failed for the original sequence, but succeeded for its reverse complement. Outputs will be derived from the reverse complement and 'reverse complement' suffix will be added to sequence ID.")
-          });
-        }
-
-        warnings
-      };
-
-      let aa_insertions = get_aa_insertions(&translation);
-
-      Ok(NextalignOutputs {
-        alignment,
-        stripped,
-        translation,
-        aa_insertions,
-        warnings,
-        missing_genes,
-        is_reverse_complement,
-        coord_map_global,
-      })
-    }
-  }
-}
diff --git a/packages_rs/nextclade/src/run/nextclade_run_one.rs b/packages_rs/nextclade/src/run/nextclade_run_one.rs
index 4dd166ab4..fc6931fe3 100644
--- a/packages_rs/nextclade/src/run/nextclade_run_one.rs
+++ b/packages_rs/nextclade/src/run/nextclade_run_one.rs
@@ -1,78 +1,105 @@
-use crate::align::insertions_strip::NucIns;
-use crate::align::params::AlignPairwiseParams;
-use crate::align::seed_match2::CodonSpacedIndex;
+use crate::align::align::align_nuc;
+use crate::align::insertions_strip::{get_aa_insertions, insertions_strip, AaIns, NucIns};
 use crate::alphabet::aa::Aa;
 use crate::alphabet::letter::Letter;
 use crate::alphabet::nuc::Nuc;
-use crate::analyze::aa_changes::{find_aa_changes, FindAaChangesOutput};
+use crate::analyze::aa_changes::{find_aa_changes, AaChangesGroup, FindAaChangesOutput};
+use crate::analyze::aa_del::AaDel;
+use crate::analyze::aa_sub::AaSub;
 use crate::analyze::divergence::calculate_branch_length;
 use crate::analyze::find_aa_motifs::find_aa_motifs;
-use crate::analyze::find_aa_motifs_changes::{find_aa_motifs_changes, AaMotifsMap};
-use crate::analyze::find_private_aa_mutations::find_private_aa_mutations;
-use crate::analyze::find_private_nuc_mutations::find_private_nuc_mutations;
+use crate::analyze::find_aa_motifs_changes::find_aa_motifs_changes;
+use crate::analyze::find_private_aa_mutations::{find_private_aa_mutations, PrivateAaMutations};
+use crate::analyze::find_private_nuc_mutations::{find_private_nuc_mutations, PrivateNucMutations};
 use crate::analyze::letter_composition::get_letter_composition;
-use crate::analyze::letter_ranges::{find_aa_letter_ranges, find_letter_ranges, find_letter_ranges_by, NucRange};
+use crate::analyze::letter_ranges::{
+  find_aa_letter_ranges, find_letter_ranges, find_letter_ranges_by, GeneAaRange, NucRange,
+};
 use crate::analyze::nuc_changes::{find_nuc_changes, FindNucChangesOutput};
 use crate::analyze::nuc_del::NucDelRange;
 use crate::analyze::pcr_primer_changes::get_pcr_primer_changes;
-use crate::analyze::pcr_primers::PcrPrimer;
 use crate::analyze::phenotype::calculate_phenotype;
-use crate::analyze::virus_properties::{PhenotypeData, VirusProperties};
-use crate::gene::gene_map::GeneMap;
-use crate::qc::qc_config::QcConfig;
+use crate::analyze::virus_properties::PhenotypeData;
+use crate::coord::coord_map_global::CoordMapGlobal;
+use crate::coord::range::AaRefRange;
+use crate::graph::node::GraphNodeKey;
 use crate::qc::qc_run::qc_run;
-use crate::run::nextalign_run_one::nextalign_run_one;
-use crate::translate::aa_alignment_ranges::{
-  calculate_aa_alignment_ranges_in_place, gather_aa_alignment_ranges, GatherAaAlignmentRangesResult,
-};
+use crate::run::nextclade_wasm::{AnalysisOutput, Nextclade};
+use crate::translate::aa_alignment_ranges::{gather_aa_alignment_ranges, GatherAaAlignmentRangesResult};
 use crate::translate::frame_shifts_flatten::frame_shifts_flatten;
-use crate::translate::translate_genes::Translation;
-use crate::tree::tree::AuspiceGraph;
+use crate::translate::frame_shifts_translate::FrameShift;
+use crate::translate::translate_genes::{translate_genes, Translation};
 use crate::tree::tree_find_nearest_node::graph_find_nearest_nodes;
-use crate::types::outputs::{NextalignOutputs, NextcladeOutputs, PhenotypeValue};
+use crate::types::outputs::{NextcladeOutputs, PeptideWarning, PhenotypeValue};
 use eyre::Report;
 use itertools::Itertools;
+use std::collections::{BTreeMap, HashSet};
+
+#[derive(Default)]
+struct NextcladeResultWithAa {
+  translation: Translation,
+  aa_changes_groups: Vec<AaChangesGroup>,
+  aa_substitutions: Vec<AaSub>,
+  aa_deletions: Vec<AaDel>,
+  total_aminoacid_substitutions: usize,
+  total_aminoacid_deletions: usize,
+  total_aminoacid_insertions: usize,
+  nuc_to_aa_muts: BTreeMap<String, Vec<AaSub>>,
+  missing_genes: Vec<String>,
+  present_genes: HashSet<String>,
+  warnings: Vec<PeptideWarning>,
+  aa_insertions: Vec<AaIns>,
+  frame_shifts: Vec<FrameShift>,
+  total_frame_shifts: usize,
+  unknown_aa_ranges: Vec<GeneAaRange>,
+  total_unknown_aa: usize,
+  aa_alignment_ranges: BTreeMap<String, Vec<AaRefRange>>,
+  aa_unsequenced_ranges: BTreeMap<String, Vec<AaRefRange>>,
+}
+
+#[derive(Default)]
+struct NextcladeResultWithGraph {
+  clade: String,
+  private_nuc_mutations: PrivateNucMutations,
+  private_aa_mutations: BTreeMap<String, PrivateAaMutations>,
+  phenotype_values: Option<Vec<PhenotypeValue>>,
+  divergence: f64,
+  custom_node_attributes: BTreeMap<String, String>,
+  nearest_node_id: GraphNodeKey,
+  nearest_nodes: Option<Vec<String>>,
+}
 
 pub fn nextclade_run_one(
   index: usize,
   seq_name: &str,
   qry_seq: &[Nuc],
-  ref_seq: &[Nuc],
-  seed_index: &CodonSpacedIndex,
-  ref_peptides: &Translation,
-  aa_motifs_ref: &AaMotifsMap,
-  gene_map: &GeneMap,
-  primers: &[PcrPrimer],
-  graph: &AuspiceGraph,
-  qc_config: &QcConfig,
-  virus_properties: &VirusProperties,
-  gap_open_close_nuc: &[i32],
-  gap_open_close_aa: &[i32],
-  params: &AlignPairwiseParams,
-  include_nearest_node_info: bool,
-) -> Result<(Vec<Nuc>, Translation, NextcladeOutputs), Report> {
-  let NextalignOutputs {
-    alignment,
-    stripped,
-    mut translation,
-    aa_insertions,
-    warnings,
-    missing_genes,
-    is_reverse_complement,
-    coord_map_global,
-  } = nextalign_run_one(
+  state: &Nextclade,
+) -> Result<AnalysisOutput, Report> {
+  let Nextclade {
+    ref_seq,
+    seed_index,
+    gap_open_close_nuc,
+    virus_properties,
+    params,
+    gene_map,
+    gap_open_close_aa,
+    ref_translation,
+    aa_motifs_ref,
+    graph,
+    ..
+  } = &state;
+
+  let alignment = align_nuc(
     index,
     seq_name,
     qry_seq,
     ref_seq,
     seed_index,
-    ref_peptides,
-    gene_map,
     gap_open_close_nuc,
-    gap_open_close_aa,
-    params,
+    &params.alignment,
   )?;
 
+  let stripped = insertions_strip(&alignment.qry_seq, &alignment.ref_seq);
   let alignment_score = alignment.alignment_score;
 
   let FindNucChangesOutput {
@@ -81,8 +108,6 @@ pub fn nextclade_run_one(
     alignment_range,
   } = find_nuc_changes(&stripped.qry_seq, ref_seq);
 
-  calculate_aa_alignment_ranges_in_place(&alignment_range, &mut translation, gene_map)?;
-
   let total_substitutions = substitutions.len();
   let total_deletions = deletions.iter().map(NucDelRange::len).sum();
 
@@ -97,129 +122,243 @@ pub fn nextclade_run_one(
 
   let nucleotide_composition = get_letter_composition(&stripped.qry_seq);
 
-  let pcr_primer_changes = get_pcr_primer_changes(&substitutions, primers);
+  let pcr_primer_changes = get_pcr_primer_changes(&substitutions, &virus_properties.primers);
   let total_pcr_primer_changes = pcr_primer_changes.iter().map(|pc| pc.substitutions.len()).sum();
 
-  let frame_shifts = frame_shifts_flatten(&translation);
-  let total_frame_shifts = frame_shifts.len();
+  let total_aligned_nucs = alignment_range.len();
+  let total_covered_nucs = total_aligned_nucs - total_missing - total_non_acgtns;
+  let coverage = total_covered_nucs as f64 / ref_seq.len() as f64;
 
-  let FindAaChangesOutput {
+  let NextcladeResultWithAa {
+    translation,
     aa_changes_groups,
     aa_substitutions,
     aa_deletions,
+    total_aminoacid_substitutions,
+    total_aminoacid_deletions,
+    total_aminoacid_insertions,
     nuc_to_aa_muts,
-  } = find_aa_changes(
-    ref_seq,
-    &stripped.qry_seq,
-    ref_peptides,
-    &translation,
-    gene_map,
-    &substitutions,
-    &deletions,
-  )?;
+    missing_genes,
+    warnings,
+    aa_insertions,
+    frame_shifts,
+    total_frame_shifts,
+    unknown_aa_ranges,
+    total_unknown_aa,
+    aa_alignment_ranges,
+    aa_unsequenced_ranges,
+    ..
+  } = if !gene_map.is_empty() {
+    let coord_map_global = CoordMapGlobal::new(&alignment.ref_seq);
 
-  let total_aminoacid_substitutions = aa_substitutions.len();
-  let total_aminoacid_deletions = aa_deletions.len();
-  let total_aminoacid_insertions = aa_insertions.len();
+    let translation = translate_genes(
+      &alignment.qry_seq,
+      &alignment.ref_seq,
+      ref_translation,
+      gene_map,
+      &coord_map_global,
+      &alignment_range,
+      gap_open_close_aa,
+      &params.alignment,
+    )?;
 
-  let unknown_aa_ranges = find_aa_letter_ranges(&translation, Aa::X);
-  let total_unknown_aa = unknown_aa_ranges.iter().map(|r| r.length).sum();
+    let present_genes: HashSet<String> = translation
+      .iter_genes()
+      .flat_map(|(_, gene_tr)| gene_tr.cdses.iter().map(|(_, cds_tr)| cds_tr.name.clone()))
+      .collect();
 
-  let nearest_node_candidates = graph_find_nearest_nodes(
-    graph,
-    &substitutions,
-    &missing,
-    &alignment_range,
-    &virus_properties.placement_mask_ranges,
-  )?;
-  let nearest_node_key = nearest_node_candidates[0].node_key;
-  let nearest_node = graph.get_node(nearest_node_key)?.payload();
+    let missing_genes = gene_map
+      .iter_genes()
+      .filter_map(|(gene_name, _)| (!present_genes.contains(gene_name)).then_some(gene_name))
+      .cloned()
+      .collect_vec();
+
+    let warnings = {
+      let mut warnings = translation
+        .iter_genes()
+        .flat_map(|(_, gene_tr)| gene_tr.warnings.clone())
+        .collect_vec();
+
+      if alignment.is_reverse_complement {
+        warnings.push(PeptideWarning {
+            gene_name: "nuc".to_owned(),
+            warning: format!("When processing sequence #{index} '{seq_name}': Sequence is reverse-complemented: Seed matching failed for the original sequence, but succeeded for its reverse complement. Outputs will be derived from the reverse complement and 'reverse complement' suffix will be added to sequence ID.")
+          });
+      }
+
+      warnings
+    };
+
+    let aa_insertions = get_aa_insertions(&translation);
+
+    let frame_shifts = frame_shifts_flatten(&translation);
+    let total_frame_shifts = frame_shifts.len();
+
+    let FindAaChangesOutput {
+      aa_changes_groups,
+      aa_substitutions,
+      aa_deletions,
+      nuc_to_aa_muts,
+    } = find_aa_changes(
+      ref_seq,
+      &stripped.qry_seq,
+      ref_translation,
+      &translation,
+      gene_map,
+      &substitutions,
+      &deletions,
+    )?;
+
+    let total_aminoacid_substitutions = aa_substitutions.len();
+    let total_aminoacid_deletions = aa_deletions.len();
+    let total_aminoacid_insertions = aa_insertions.len();
+
+    let unknown_aa_ranges = find_aa_letter_ranges(&translation, Aa::X);
+    let total_unknown_aa = unknown_aa_ranges.iter().map(|r| r.length).sum();
+
+    let GatherAaAlignmentRangesResult {
+      aa_alignment_ranges,
+      aa_unsequenced_ranges,
+    } = gather_aa_alignment_ranges(&translation, gene_map);
+
+    NextcladeResultWithAa {
+      translation,
+      aa_changes_groups,
+      aa_substitutions,
+      aa_deletions,
+      total_aminoacid_substitutions,
+      total_aminoacid_deletions,
+      total_aminoacid_insertions,
+      nuc_to_aa_muts,
+      missing_genes,
+      present_genes,
+      warnings,
+      aa_insertions,
+      frame_shifts,
+      total_frame_shifts,
+      unknown_aa_ranges,
+      total_unknown_aa,
+      aa_alignment_ranges,
+      aa_unsequenced_ranges,
+    }
+  } else {
+    NextcladeResultWithAa::default()
+  };
+
+  let NextcladeResultWithGraph {
+    clade,
+    private_nuc_mutations,
+    private_aa_mutations,
+    phenotype_values,
+    divergence,
+    custom_node_attributes,
+    nearest_node_id,
+    nearest_nodes,
+  } = if let Some(graph) = graph {
+    let nearest_node_candidates = graph_find_nearest_nodes(graph, &substitutions, &missing, &alignment_range)?;
+    let nearest_node_key = nearest_node_candidates[0].node_key;
+    let nearest_node = graph.get_node(nearest_node_key)?.payload();
 
-  let nearest_nodes = include_nearest_node_info.then_some(
-    nearest_node_candidates
+    let nearest_nodes = params.general.include_nearest_node_info.then_some(
+      nearest_node_candidates
     .iter()
     // Choose all nodes with distance equal to the distance of the nearest node
     .filter(|n| n.distance == nearest_node_candidates[0].distance)
     .map(|n| Ok(graph.get_node(n.node_key)?.payload().name.clone()))
     .collect::<Result<Vec<String>, Report>>()?,
-  );
+    );
 
-  let clade = nearest_node.clade();
+    let clade = nearest_node.clade();
 
-  let clade_node_attr_keys = graph.data.meta.clade_node_attr_descs();
-  let clade_node_attrs = nearest_node.get_clade_node_attrs(clade_node_attr_keys);
+    let clade_node_attr_keys = graph.data.meta.clade_node_attr_descs();
+    let clade_node_attrs = nearest_node.get_clade_node_attrs(clade_node_attr_keys);
 
-  let private_nuc_mutations = find_private_nuc_mutations(
-    nearest_node,
-    &substitutions,
-    &deletions,
-    &missing,
-    &alignment_range,
-    ref_seq,
-    &non_acgtns,
-    virus_properties,
-  );
-
-  let GatherAaAlignmentRangesResult {
-    aa_alignment_ranges,
-    aa_unsequenced_ranges,
-  } = gather_aa_alignment_ranges(&translation, gene_map);
-
-  let private_aa_mutations = find_private_aa_mutations(
-    nearest_node,
-    &aa_substitutions,
-    &aa_deletions,
-    &unknown_aa_ranges,
-    &aa_unsequenced_ranges,
-    ref_peptides,
-    gene_map,
-  );
-  let parent_div = nearest_node.node_attrs.div.unwrap_or(0.0);
-  let divergence = parent_div
-    + calculate_branch_length(
-      &private_nuc_mutations.private_substitutions,
-      graph.data.tmp.divergence_units,
-      ref_seq.len(),
+    let private_nuc_mutations = find_private_nuc_mutations(
+      nearest_node,
+      &substitutions,
+      &deletions,
+      &missing,
+      &alignment_range,
+      ref_seq,
+      &non_acgtns,
+      virus_properties,
     );
 
-  let total_aligned_nucs = alignment_range.len();
-  let total_covered_nucs = total_aligned_nucs - total_missing - total_non_acgtns;
-  let coverage = total_covered_nucs as f64 / ref_seq.len() as f64;
+    let private_aa_mutations = find_private_aa_mutations(
+      nearest_node,
+      &aa_substitutions,
+      &aa_deletions,
+      &unknown_aa_ranges,
+      &aa_unsequenced_ranges,
+      ref_translation,
+      gene_map,
+    );
+    let parent_div = nearest_node.node_attrs.div.unwrap_or(0.0);
+    let masked_ranges = graph.data.meta.placement_mask_ranges();
+    let divergence = parent_div
+      + calculate_branch_length(
+        &private_nuc_mutations.private_substitutions,
+        masked_ranges,
+        graph.data.tmp.divergence_units,
+        ref_seq.len(),
+      );
 
-  let phenotype_values = virus_properties.phenotype_data.as_ref().map(|phenotype_data| {
-    phenotype_data
-      .iter()
-      .filter_map(|phenotype_data| {
-        let PhenotypeData { name, gene, ignore, .. } = phenotype_data;
-        if ignore.clades.contains(&clade) {
-          return None;
-        }
-        let phenotype = calculate_phenotype(phenotype_data, &aa_substitutions);
-        Some(PhenotypeValue {
-          name: name.clone(),
-          gene: gene.clone(),
-          value: phenotype,
+    let phenotype_values = virus_properties.phenotype_data.as_ref().map(|phenotype_data| {
+      phenotype_data
+        .iter()
+        .filter_map(|phenotype_data| {
+          let PhenotypeData { name, gene, ignore, .. } = phenotype_data;
+          if ignore.clades.contains(&clade) {
+            return None;
+          }
+          let phenotype = calculate_phenotype(phenotype_data, &aa_substitutions);
+          Some(PhenotypeValue {
+            name: name.clone(),
+            gene: gene.clone(),
+            value: phenotype,
+          })
         })
-      })
-      .collect_vec()
-  });
+        .collect_vec()
+    });
+
+    NextcladeResultWithGraph {
+      clade,
+      private_nuc_mutations,
+      private_aa_mutations,
+      phenotype_values,
+      divergence,
+      custom_node_attributes: clade_node_attrs,
+      nearest_node_id: nearest_node_key,
+      nearest_nodes,
+    }
+  } else {
+    NextcladeResultWithGraph::default()
+  };
 
   let aa_motifs = find_aa_motifs(&virus_properties.aa_motifs, &translation)?;
-  let aa_motifs_changes = find_aa_motifs_changes(aa_motifs_ref, &aa_motifs, ref_peptides, &translation)?;
-
-  let qc = qc_run(
-    &private_nuc_mutations,
-    &nucleotide_composition,
-    total_missing,
-    &translation,
-    &frame_shifts,
-    qc_config,
-  );
-
-  Ok((
-    stripped.qry_seq,
+  let aa_motifs_changes = find_aa_motifs_changes(aa_motifs_ref, &aa_motifs, ref_translation, &translation)?;
+
+  let qc = virus_properties
+    .qc
+    .as_ref()
+    .map(|qc_config| {
+      qc_run(
+        &private_nuc_mutations,
+        &nucleotide_composition,
+        total_missing,
+        &translation,
+        &frame_shifts,
+        qc_config,
+      )
+    })
+    .unwrap_or_default();
+
+  let is_reverse_complement = alignment.is_reverse_complement;
+
+  Ok(AnalysisOutput {
+    query: stripped.qry_seq,
     translation,
-    NextcladeOutputs {
+    analysis_result: NextcladeOutputs {
       index,
       seq_name: seq_name.to_owned(),
       substitutions,
@@ -251,21 +390,21 @@ pub fn nextclade_run_one(
       aa_unsequenced_ranges,
       pcr_primer_changes,
       total_pcr_primer_changes,
-      clade,
-      private_nuc_mutations,
-      private_aa_mutations,
       warnings,
       missing_genes,
-      divergence,
       coverage,
-      phenotype_values,
       aa_motifs,
       aa_motifs_changes,
       qc,
-      custom_node_attributes: clade_node_attrs,
-      nearest_node_id: nearest_node_key,
+      clade,
+      private_nuc_mutations,
+      private_aa_mutations,
+      phenotype_values,
+      divergence,
+      custom_node_attributes,
+      nearest_node_id,
       nearest_nodes,
       is_reverse_complement,
     },
-  ))
+  })
 }
diff --git a/packages_rs/nextclade/src/run/nextclade_wasm.rs b/packages_rs/nextclade/src/run/nextclade_wasm.rs
index c44c089c4..0873e8537 100644
--- a/packages_rs/nextclade/src/run/nextclade_wasm.rs
+++ b/packages_rs/nextclade/src/run/nextclade_wasm.rs
@@ -1,11 +1,9 @@
-use crate::align::gap_open::{get_gap_open_close_scores_codon_aware, get_gap_open_close_scores_flat};
-use crate::align::params::AlignPairwiseParams;
+use crate::align::gap_open::{get_gap_open_close_scores_codon_aware, get_gap_open_close_scores_flat, GapScoreMap};
 use crate::align::seed_match2::CodonSpacedIndex;
 use crate::alphabet::letter::{serde_deserialize_seq, serde_serialize_seq};
-use crate::alphabet::nuc::{to_nuc_seq, Nuc};
+use crate::alphabet::nuc::{to_nuc_seq, to_nuc_seq_replacing, Nuc};
 use crate::analyze::find_aa_motifs::find_aa_motifs;
 use crate::analyze::find_aa_motifs_changes::AaMotifsMap;
-use crate::analyze::pcr_primers::PcrPrimer;
 use crate::analyze::phenotype::get_phenotype_attr_descs;
 use crate::analyze::virus_properties::{AaMotifsDesc, PhenotypeAttrDesc, VirusProperties};
 use crate::gene::gene_map::GeneMap;
@@ -13,49 +11,52 @@ use crate::graph::graph::{convert_auspice_tree_to_graph, convert_graph_to_auspic
 use crate::io::fasta::{read_one_fasta_str, FastaRecord};
 use crate::io::nextclade_csv::CsvColumnConfig;
 use crate::io::nwk_writer::convert_graph_to_nwk_string;
-use crate::qc::qc_config::QcConfig;
 use crate::run::nextclade_run_one::nextclade_run_one;
+use crate::run::params::{NextcladeInputParams, NextcladeInputParamsOptional};
 use crate::translate::translate_genes::Translation;
 use crate::translate::translate_genes_ref::translate_genes_ref;
-use crate::tree::params::TreeBuilderParams;
 use crate::tree::tree::{AuspiceGraph, AuspiceTree, CladeNodeAttrKeyDesc};
 use crate::tree::tree_builder::graph_attach_new_nodes_in_place;
 use crate::tree::tree_preprocess::graph_preprocess_in_place;
 use crate::types::outputs::NextcladeOutputs;
-use crate::utils::error::report_to_string;
 use eyre::{Report, WrapErr};
+use itertools::Itertools;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
 use std::str::FromStr;
 
 #[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct NextcladeParams {
   #[schemars(with = "String")]
-  pub ref_seq: Vec<Nuc>,
+  pub ref_record: FastaRecord,
   pub gene_map: GeneMap,
-  pub tree: AuspiceTree,
-  pub qc_config: QcConfig,
+  pub tree: Option<AuspiceTree>,
   pub virus_properties: VirusProperties,
 }
 
 impl NextcladeParams {
-  pub fn from_raw(raw: &NextcladeParamsRaw) -> Result<Self, Report> {
-    let ref_seq = {
-      let ref_record = read_one_fasta_str(&raw.ref_seq).wrap_err("When parsing reference sequence")?;
-      to_nuc_seq(&ref_record.seq).wrap_err("When converting reference sequence")?
-    };
-    let tree = AuspiceTree::from_str(&raw.tree).wrap_err("When parsing reference tree Auspice JSON v2")?;
-    let gene_map = GeneMap::from_str(&raw.gene_map).wrap_err("When parsing gene map")?;
-    let qc_config = QcConfig::from_str(&raw.qc_config).wrap_err("When parsing QC config JSON")?;
+  pub fn from_raw(raw: NextcladeParamsRaw) -> Result<Self, Report> {
     let virus_properties =
       VirusProperties::from_str(&raw.virus_properties).wrap_err("When parsing virus properties JSON")?;
 
+    let ref_record = read_one_fasta_str(&raw.ref_seq).wrap_err("When parsing reference sequence")?;
+
+    let tree = raw
+      .tree
+      .map(|tree| AuspiceTree::from_str(tree).wrap_err("When parsing reference tree Auspice JSON v2"))
+      .transpose()?;
+
+    let gene_map = raw.gene_map.map_or_else(
+      || Ok(GeneMap::new()), // If genome annotation is not provided, use an empty one
+      |gene_map| GeneMap::from_str(gene_map).wrap_err("When parsing genome annotation"),
+    )?;
+
     Ok(Self {
-      ref_seq,
+      ref_record,
       gene_map,
       tree,
-      qc_config,
       virus_properties,
     })
   }
@@ -66,9 +67,8 @@ impl NextcladeParams {
 pub struct NextcladeParamsRaw {
   #[schemars(with = "String")]
   pub ref_seq: String,
-  pub gene_map: String,
-  pub tree: String,
-  pub qc_config: String,
+  pub gene_map: Option<String>,
+  pub tree: Option<String>,
   pub virus_properties: String,
 }
 
@@ -80,15 +80,16 @@ pub struct AnalysisInput {
   pub qry_seq_str: String,
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
+#[derive(Clone, Debug, Serialize, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct AnalysisInitialData {
-  gene_map: GeneMap,
-  genome_size: usize,
-  clade_node_attr_key_descs: Vec<CladeNodeAttrKeyDesc>,
-  phenotype_attr_descs: Vec<PhenotypeAttrDesc>,
-  aa_motifs_descs: Vec<AaMotifsDesc>,
-  csv_column_config_default: CsvColumnConfig,
+pub struct AnalysisInitialData<'a> {
+  pub genome_size: usize,
+  pub gene_map: GeneMap,
+  pub clade_node_attr_key_descs: &'a [CladeNodeAttrKeyDesc],
+  pub phenotype_attr_descs: &'a [PhenotypeAttrDesc],
+  pub aa_motifs_descs: &'a [AaMotifsDesc],
+  pub aa_motif_keys: &'a [String],
+  pub csv_column_config_default: CsvColumnConfig,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
@@ -97,9 +98,9 @@ pub struct AnalysisOutput {
   #[schemars(with = "String")]
   #[serde(serialize_with = "serde_serialize_seq")]
   #[serde(deserialize_with = "serde_deserialize_seq")]
-  query: Vec<Nuc>,
-  translation: Translation,
-  analysis_result: NextcladeOutputs,
+  pub query: Vec<Nuc>,
+  pub translation: Translation,
+  pub analysis_result: NextcladeOutputs,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
@@ -112,153 +113,157 @@ pub struct NextcladeResult {
 }
 
 pub struct Nextclade {
-  ref_seq: Vec<Nuc>,
-  seed_index: CodonSpacedIndex,
-  ref_translation: Translation,
-  aa_motifs_ref: AaMotifsMap,
-  gene_map: GeneMap,
-  primers: Vec<PcrPrimer>,
-  graph: AuspiceGraph,
-  qc_config: QcConfig,
-  virus_properties: VirusProperties,
-  gap_open_close_nuc: Vec<i32>,
-  gap_open_close_aa: Vec<i32>,
-  clade_node_attrs: Vec<CladeNodeAttrKeyDesc>,
-  phenotype_attr_descs: Vec<PhenotypeAttrDesc>,
-  aa_motifs_descs: Vec<AaMotifsDesc>,
-  alignment_params: AlignPairwiseParams,
-  tree_builder_params: TreeBuilderParams,
-  include_nearest_node_info: bool,
+  // Always present
+  pub ref_record: FastaRecord,
+  pub ref_seq: Vec<Nuc>,
+  pub seed_index: CodonSpacedIndex,
+  pub gap_open_close_nuc: Vec<i32>,
+  pub virus_properties: VirusProperties,
+  pub params: NextcladeInputParams,
+
+  // If genome annotation is provided
+  pub gene_map: GeneMap,
+  pub gap_open_close_aa: Vec<i32>,
+  pub ref_translation: Translation,
+  pub aa_motifs_ref: AaMotifsMap,
+  pub aa_motifs_descs: Vec<AaMotifsDesc>,
+  pub aa_motifs_keys: Vec<String>,
+
+  // If ref tree is provided
+  pub graph: Option<AuspiceGraph>,
+  pub clade_attr_descs: Vec<CladeNodeAttrKeyDesc>,
+  pub phenotype_attr_descs: Vec<PhenotypeAttrDesc>,
+}
+
+pub struct InitialStateWithAa {
+  pub gap_open_close_nuc: GapScoreMap,
+  pub gap_open_close_aa: GapScoreMap,
+  pub ref_translation: Translation,
+  pub aa_motifs_ref: AaMotifsMap,
+}
+
+pub struct NextcladeStateWithGraph {
+  pub graph: AuspiceGraph,
+  pub clade_node_attrs: Vec<CladeNodeAttrKeyDesc>,
+  pub phenotype_attr_descs: Vec<PhenotypeAttrDesc>,
 }
 
 impl Nextclade {
-  pub fn new(params: NextcladeParams) -> Result<Self, Report> {
+  pub fn new(inputs: NextcladeParams, params: &NextcladeInputParamsOptional) -> Result<Self, Report> {
     let NextcladeParams {
-      ref_seq,
+      ref_record,
       gene_map,
       tree,
-      qc_config,
       virus_properties,
-    } = params;
+    } = inputs;
 
+    let params = NextcladeInputParams::from_optional(params, &virus_properties);
+    let ref_seq = to_nuc_seq(&ref_record.seq).wrap_err("When converting reference sequence")?;
     let seed_index = CodonSpacedIndex::from_sequence(&ref_seq);
 
-    let alignment_params = {
-      let mut alignment_params = AlignPairwiseParams::default();
-      // Merge alignment params coming from virus_properties into alignment_params
-      if let Some(alignment_params_from_file) = &virus_properties.alignment_params {
-        alignment_params.merge_opt(alignment_params_from_file.clone());
-      }
-      alignment_params
-    };
+    // If genome annotation is present, calculate AA-related parameters
+    let InitialStateWithAa {
+      gap_open_close_nuc,
+      gap_open_close_aa,
+      ref_translation,
+      aa_motifs_ref,
+    } = if !gene_map.is_empty() {
+      let gap_open_close_nuc = get_gap_open_close_scores_codon_aware(&ref_seq, &gene_map, &params.alignment);
+      let gap_open_close_aa = get_gap_open_close_scores_flat(&ref_seq, &params.alignment);
 
-    let tree_builder_params = {
-      let mut tree_builder_params = TreeBuilderParams::default();
-      // Merge alignment params coming from virus_properties into alignment_params
-      if let Some(tree_builder_params_from_file) = &virus_properties.tree_builder_params {
-        tree_builder_params.merge_opt(tree_builder_params_from_file.clone());
+      let ref_translation =
+        translate_genes_ref(&ref_seq, &gene_map, &params.alignment).wrap_err("When translating reference sequence")?;
+
+      let aa_motifs_ref = find_aa_motifs(&virus_properties.aa_motifs, &ref_translation)
+        .wrap_err("When searching AA motifs in reference translation")?;
+
+      InitialStateWithAa {
+        gap_open_close_nuc,
+        gap_open_close_aa,
+        ref_translation,
+        aa_motifs_ref,
+      }
+    } else {
+      let gap_open_close = get_gap_open_close_scores_flat(&ref_seq, &params.alignment);
+      InitialStateWithAa {
+        gap_open_close_nuc: gap_open_close.clone(),
+        gap_open_close_aa: gap_open_close,
+        ref_translation: Translation::default(),
+        aa_motifs_ref: BTreeMap::default(),
       }
-      tree_builder_params
     };
 
-    let gap_open_close_nuc = get_gap_open_close_scores_codon_aware(&ref_seq, &gene_map, &alignment_params);
+    let graph = tree
+      .map(|tree| -> Result<AuspiceGraph, Report> {
+        let mut graph =
+          convert_auspice_tree_to_graph(tree).wrap_err("When converting Auspice tree to Nextclade graph")?;
 
-    let gap_open_close_aa = get_gap_open_close_scores_flat(&ref_seq, &alignment_params);
+        graph_preprocess_in_place(&mut graph, &ref_seq, &ref_translation)
+          .wrap_err("When preprocessing Nextclade graph")?;
 
-    let ref_translation =
-      translate_genes_ref(&ref_seq, &gene_map, &alignment_params).wrap_err("When translating reference genes")?;
+        Ok(graph)
+      })
+      .transpose()?;
 
-    let aa_motifs_ref = find_aa_motifs(&virus_properties.aa_motifs, &ref_translation)?;
-
-    let mut graph = convert_auspice_tree_to_graph(tree)?;
-    graph_preprocess_in_place(&mut graph, &ref_seq, &ref_translation)?;
-    let clade_node_attrs = graph.data.meta.clade_node_attr_descs().to_vec();
+    let clade_attr_descs = graph
+      .as_ref()
+      .map(|graph| graph.data.meta.clade_node_attr_descs().to_vec())
+      .unwrap_or_default();
 
     let phenotype_attr_descs = get_phenotype_attr_descs(&virus_properties);
 
     let aa_motifs_descs = virus_properties.aa_motifs.clone();
+    let aa_motifs_keys = aa_motifs_descs.iter().map(|desc| desc.name.clone()).collect_vec();
 
     Ok(Self {
+      ref_record,
       ref_seq,
       seed_index,
+      gap_open_close_nuc,
+      virus_properties,
+      params,
+      gene_map,
+      gap_open_close_aa,
       ref_translation,
       aa_motifs_ref,
-      gene_map,
-      primers: vec![], // FIXME
+      aa_motifs_descs,
+      aa_motifs_keys,
       graph,
-      qc_config,
-      virus_properties,
-      gap_open_close_nuc,
-      gap_open_close_aa,
-      clade_node_attrs,
+      clade_attr_descs,
       phenotype_attr_descs,
-      aa_motifs_descs,
-      alignment_params,
-      tree_builder_params,
-      include_nearest_node_info: false, // Never emit nearest node info in web, to reduce output size
     })
   }
 
-  #[inline]
-  pub fn get_initial_data(&self) -> Result<AnalysisInitialData, Report> {
-    Ok(AnalysisInitialData {
+  pub fn get_initial_data(&self) -> AnalysisInitialData {
+    AnalysisInitialData {
       gene_map: self.gene_map.clone(),
       genome_size: self.ref_seq.len(),
-      clade_node_attr_key_descs: self.clade_node_attrs.clone(),
-      phenotype_attr_descs: self.phenotype_attr_descs.clone(),
-      aa_motifs_descs: self.aa_motifs_descs.clone(),
+      clade_node_attr_key_descs: &self.clade_attr_descs,
+      phenotype_attr_descs: &self.phenotype_attr_descs,
+      aa_motifs_descs: &self.aa_motifs_descs,
+      aa_motif_keys: &self.aa_motifs_keys,
       csv_column_config_default: CsvColumnConfig::default(),
-    })
+    }
   }
 
-  pub fn run(&mut self, input: &FastaRecord) -> Result<NextcladeResult, Report> {
-    let qry_seq = to_nuc_seq(&input.seq).wrap_err("When converting query sequence")?;
-
-    match nextclade_run_one(
-      input.index,
-      &input.seq_name,
-      &qry_seq,
-      &self.ref_seq,
-      &self.seed_index,
-      &self.ref_translation,
-      &self.aa_motifs_ref,
-      &self.gene_map,
-      &self.primers,
-      &self.graph,
-      &self.qc_config,
-      &self.virus_properties,
-      &self.gap_open_close_nuc,
-      &self.gap_open_close_aa,
-      &self.alignment_params,
-      self.include_nearest_node_info,
-    ) {
-      Ok((query, translation, analysis_result)) => Ok(NextcladeResult {
-        index: input.index,
-        seq_name: input.seq_name.clone(),
-        result: Some(AnalysisOutput {
-          query,
-          translation,
-          analysis_result,
-        }),
-        error: None,
-      }),
-      Err(err) => {
-        let error = report_to_string(&err);
-        Ok(NextcladeResult {
-          index: input.index,
-          seq_name: input.seq_name.clone(),
-          result: None,
-          error: Some(error),
-        })
-      }
+  pub fn run(&self, input: &FastaRecord) -> Result<AnalysisOutput, Report> {
+    if self.params.general.replace_unknown {
+      Ok(to_nuc_seq_replacing(&input.seq))
+    } else {
+      to_nuc_seq(&input.seq)
     }
+    .and_then(|qry_seq| nextclade_run_one(input.index, &input.seq_name, &qry_seq, self))
   }
 
-  pub fn get_output_trees(&mut self, results: Vec<NextcladeOutputs>) -> Result<OutputTrees, Report> {
-    graph_attach_new_nodes_in_place(&mut self.graph, results, self.ref_seq.len(), &self.tree_builder_params)?;
-    let auspice = convert_graph_to_auspice_tree(&self.graph)?;
-    let nwk = convert_graph_to_nwk_string(&self.graph)?;
-    Ok(OutputTrees { auspice, nwk })
+  pub fn get_output_trees(&mut self, results: Vec<NextcladeOutputs>) -> Result<Option<OutputTrees>, Report> {
+    if let Some(graph) = &mut self.graph {
+      graph_attach_new_nodes_in_place(graph, results, self.ref_seq.len(),  &self.params.tree_builder)?;
+      let auspice = convert_graph_to_auspice_tree(graph)?;
+      let nwk = convert_graph_to_nwk_string(graph)?;
+      Ok(Some(OutputTrees { auspice, nwk }))
+    } else {
+      Ok(None)
+    }
   }
 }
 
diff --git a/packages_rs/nextclade/src/run/params.rs b/packages_rs/nextclade/src/run/params.rs
new file mode 100644
index 000000000..2a9738e20
--- /dev/null
+++ b/packages_rs/nextclade/src/run/params.rs
@@ -0,0 +1,81 @@
+use crate::align::params::{AlignPairwiseParams, AlignPairwiseParamsOptional};
+use crate::analyze::virus_properties::VirusProperties;
+use crate::run::params_general::{NextcladeGeneralParams, NextcladeGeneralParamsOptional};
+use crate::tree::params::{TreeBuilderParams, TreeBuilderParamsOptional};
+use clap::Parser;
+use serde::{Deserialize, Serialize};
+
+#[derive(Parser, Debug, Default, Clone, Serialize, Deserialize, schemars::JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct NextcladeInputParamsOptional {
+  #[clap(flatten, next_help_heading = "General parameters")]
+  pub general: Option<NextcladeGeneralParamsOptional>,
+
+  #[clap(flatten, next_help_heading = "Phylogenetic tree parameters")]
+  pub tree_builder: Option<TreeBuilderParamsOptional>,
+
+  #[clap(flatten, next_help_heading = "Alignment parameters")]
+  pub alignment: Option<AlignPairwiseParamsOptional>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct NextcladeInputParams {
+  pub general: NextcladeGeneralParams,
+  pub tree_builder: TreeBuilderParams,
+  pub alignment: AlignPairwiseParams,
+}
+
+impl NextcladeInputParams {
+  pub fn from_optional(params: &NextcladeInputParamsOptional, virus_properties: &VirusProperties) -> Self {
+    // FIXME: this code is repetitive and error prone
+
+    let general = {
+      // Start with defaults
+      let mut general_params = NextcladeGeneralParams::default();
+      // Merge params coming from virus_properties
+      if let Some(general_params_from_file) = &virus_properties.general_params {
+        general_params.merge_opt(general_params_from_file.clone());
+      }
+      // Merge incoming params
+      if let Some(general_params_incoming) = &params.general {
+        general_params.merge_opt(general_params_incoming.clone());
+      }
+      general_params
+    };
+
+    let alignment = {
+      // Start with defaults
+      let mut alignment_params = AlignPairwiseParams::default();
+      // Merge params coming from virus_properties
+      if let Some(alignment_params_from_file) = &virus_properties.alignment_params {
+        alignment_params.merge_opt(alignment_params_from_file.clone());
+      }
+      // Merge incoming params
+      if let Some(alignment_params_incoming) = &params.alignment {
+        alignment_params.merge_opt(alignment_params_incoming.clone());
+      }
+      alignment_params
+    };
+
+    let tree_builder = {
+      // Start with defaults
+      let mut tree_builder_params = TreeBuilderParams::default();
+      // Merge params coming from virus_properties
+      if let Some(tree_builder_params_from_file) = &virus_properties.tree_builder_params {
+        tree_builder_params.merge_opt(tree_builder_params_from_file.clone());
+      }
+      // Merge incoming params
+      if let Some(tree_builder_params_incoming) = &params.tree_builder {
+        tree_builder_params.merge_opt(tree_builder_params_incoming.clone());
+      }
+      tree_builder_params
+    };
+
+    Self {
+      general,
+      tree_builder,
+      alignment,
+    }
+  }
+}
diff --git a/packages_rs/nextclade/src/run/params_general.rs b/packages_rs/nextclade/src/run/params_general.rs
new file mode 100644
index 000000000..fb3d19805
--- /dev/null
+++ b/packages_rs/nextclade/src/run/params_general.rs
@@ -0,0 +1,54 @@
+use clap::Parser;
+use optfield::optfield;
+use serde::{Deserialize, Serialize};
+
+#[allow(clippy::struct_excessive_bools)]
+#[optfield(pub NextcladeGeneralParamsOptional, attrs, doc, field_attrs, field_doc, merge_fn = pub)]
+#[derive(Parser, Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct NextcladeGeneralParams {
+  /// Whether to include aligned reference nucleotide sequence into output nucleotide sequence FASTA file and reference peptides into output peptide FASTA files.
+  #[clap(long)]
+  #[clap(num_args=0..=1, default_missing_value = "true")]
+  pub include_reference: bool,
+
+  /// Whether to include the list of nearest nodes to the outputs
+  #[clap(long)]
+  #[clap(num_args=0..=1, default_missing_value = "true")]
+  pub include_nearest_node_info: bool,
+
+  /// Emit output sequences in-order.
+  ///
+  /// With this flag the program will wait for results from the previous sequences to be written to the output files before writing the results of the next sequences, preserving the same order as in the input file. Due to variable sequence processing times, this might introduce unnecessary waiting times, but ensures that the resulting sequences are written in the same order as they occur in the inputs (except for sequences which have errors).
+  /// By default, without this flag, processing might happen out of order, which is faster, due to the elimination of waiting, but might also lead to results written out of order - the order of results is not specified and depends on thread scheduling and processing times of individual sequences.
+  ///
+  /// This option is only relevant when `--jobs` is greater than 1 or is omitted.
+  ///
+  /// Note: the sequences which trigger errors during processing will be omitted from outputs, regardless of this flag.
+  #[clap(long)]
+  #[clap(num_args=0..=1, default_missing_value = "true")]
+  pub in_order: bool,
+
+  /// Replace unknown nucleotide characters with 'N'
+  ///
+  /// By default, the sequences containing unknown nucleotide characters are skipped with a warning - they
+  /// are not analyzed and not included into results. If this flag is provided, then before the alignment,
+  /// all unknown characters are replaced with 'N'. This replacement allows to analyze these sequences.
+  ///
+  /// The following characters are considered known:  '-', 'A', 'B', 'C', 'D', 'G', 'H', 'K', 'M', 'N', 'R', 'S', 'T', 'V', 'W', 'Y'
+  #[clap(long)]
+  #[clap(num_args=0..=1, default_missing_value = "true")]
+  pub replace_unknown: bool,
+}
+
+#[allow(clippy::derivable_impls)]
+impl Default for NextcladeGeneralParams {
+  fn default() -> Self {
+    Self {
+      include_reference: false,
+      include_nearest_node_info: false,
+      in_order: false,
+      replace_unknown: false,
+    }
+  }
+}
diff --git a/packages_rs/nextclade/src/sort/minimizer_index.rs b/packages_rs/nextclade/src/sort/minimizer_index.rs
new file mode 100644
index 000000000..291b88ce8
--- /dev/null
+++ b/packages_rs/nextclade/src/sort/minimizer_index.rs
@@ -0,0 +1,126 @@
+use crate::io::fs::read_file_to_string;
+use crate::io::json::json_parse;
+use crate::io::schema_version::{SchemaVersion, SchemaVersionParams};
+use eyre::{Report, WrapErr};
+use log::warn;
+use schemars::JsonSchema;
+use serde::ser::SerializeMap;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::collections::BTreeMap;
+use std::path::Path;
+use std::str::FromStr;
+
+pub const MINIMIZER_INDEX_SCHEMA_VERSION_FROM: &str = "3.0.0";
+pub const MINIMIZER_INDEX_SCHEMA_VERSION_TO: &str = "3.0.0";
+pub const MINIMIZER_INDEX_ALGO_VERSION: &str = "1";
+
+pub type MinimizerMap = BTreeMap<u64, String>;
+
+/// Contains external configuration and data specific for a particular pathogen
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MinimizerIndexJson {
+  #[serde(rename = "schemaVersion")]
+  pub schema_version: String,
+
+  pub version: String,
+
+  pub params: MinimizerIndexParams,
+
+  #[schemars(with = "BTreeMap<String, String>")]
+  #[serde(serialize_with = "serde_serialize_minimizers")]
+  #[serde(deserialize_with = "serde_deserialize_minimizers")]
+  #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
+  pub minimizers: MinimizerMap,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub references: Vec<MinimizerIndexRefInfo>,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub normalization: Vec<f64>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+/// Serde serializer for Letter sequences
+pub fn serde_serialize_minimizers<S: Serializer>(minimizers: &MinimizerMap, s: S) -> Result<S::Ok, S::Error> {
+  let mut map = s.serialize_map(Some(minimizers.len()))?;
+  for (k, v) in minimizers {
+    map.serialize_entry(&k.to_string(), &v.to_string())?;
+  }
+  map.end()
+}
+
+/// Serde deserializer for Letter sequences
+pub fn serde_deserialize_minimizers<'de, D: Deserializer<'de>>(deserializer: D) -> Result<MinimizerMap, D::Error> {
+  let map = BTreeMap::<String, String>::deserialize(deserializer)?;
+
+  let res = map
+    .into_iter()
+    .map(|(k, v)| Ok((u64::from_str(&k)?, v)))
+    .collect::<Result<MinimizerMap, Report>>()
+    .unwrap();
+
+  Ok(res)
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MinimizerIndexParams {
+  pub k: i64,
+
+  pub cutoff: i64,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MinimizerIndexRefInfo {
+  pub length: i64,
+  pub name: String,
+  pub n_minimizers: i64,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct VersionCheck {
+  pub version: String,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+impl MinimizerIndexJson {
+  pub fn from_path(filepath: impl AsRef<Path>) -> Result<Self, Report> {
+    let filepath = filepath.as_ref();
+    let data =
+      read_file_to_string(filepath).wrap_err_with(|| format!("When reading minimizer index file: {filepath:#?}"))?;
+    Self::from_str(data)
+  }
+
+  pub fn from_str(s: impl AsRef<str>) -> Result<Self, Report> {
+    let s = s.as_ref();
+
+    SchemaVersion::check_warn(
+      s,
+      &SchemaVersionParams {
+        name: "minimizer_index.json",
+        ver_from: Some(MINIMIZER_INDEX_SCHEMA_VERSION_FROM),
+        ver_to: Some(MINIMIZER_INDEX_SCHEMA_VERSION_TO),
+      },
+    );
+
+    let VersionCheck { version, .. } = json_parse(s)?;
+    if version.as_str() > MINIMIZER_INDEX_ALGO_VERSION {
+      warn!("Version of the minimizer index data ({version}) is greater than maximum supported by this version of Nextclade ({MINIMIZER_INDEX_ALGO_VERSION}). This may lead to errors or incorrect results. Please try to update your version of Nextclade and/or contact dataset maintainers for more details.");
+    }
+
+    json_parse(s)
+  }
+}
diff --git a/packages_rs/nextclade/src/sort/minimizer_search.rs b/packages_rs/nextclade/src/sort/minimizer_search.rs
new file mode 100644
index 000000000..bc01dbf1b
--- /dev/null
+++ b/packages_rs/nextclade/src/sort/minimizer_search.rs
@@ -0,0 +1,146 @@
+use crate::io::fasta::FastaRecord;
+use crate::sort::minimizer_index::{MinimizerIndexJson, MinimizerIndexParams};
+use crate::sort::params::NextcladeSeqSortParams;
+use eyre::Report;
+use itertools::{izip, Itertools};
+use ordered_float::OrderedFloat;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::str::FromStr;
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MinimizerSearchDatasetResult {
+  pub name: String,
+  pub length: i64,
+  pub n_hits: u64,
+  pub score: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MinimizerSearchResult {
+  pub total_hits: u64,
+  pub max_score: f64,
+  pub datasets: Vec<MinimizerSearchDatasetResult>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MinimizerSearchRecord {
+  pub fasta_record: FastaRecord,
+  pub result: MinimizerSearchResult,
+}
+
+#[allow(clippy::string_slice)]
+pub fn run_minimizer_search(
+  fasta_record: &FastaRecord,
+  index: &MinimizerIndexJson,
+  search_params: &NextcladeSeqSortParams,
+) -> Result<MinimizerSearchResult, Report> {
+  let normalization = &index.normalization;
+  let n_refs = index.references.len();
+
+  let minimizers = get_ref_search_minimizers(fasta_record, &index.params);
+  let mut hit_counts = vec![0; n_refs];
+  for m in minimizers {
+    if let Some(mz) = index.minimizers.get(&m) {
+      for i in 0..n_refs {
+        hit_counts[i] += u64::from_str(&mz[i..=i])?;
+      }
+    }
+  }
+
+  // we expect hits to be proportional to the length of the sequence and the number of minimizers per reference
+  let mut scores: Vec<f64> = vec![0.0; hit_counts.len()];
+  for i in 0..n_refs {
+    scores[i] = hit_counts[i] as f64 * normalization[i] / fasta_record.seq.len() as f64;
+  }
+
+  let max_score = scores.iter().copied().fold(0.0, f64::max);
+  let total_hits: u64 = hit_counts.iter().sum();
+
+  let datasets = izip!(&index.references, hit_counts, scores)
+    .filter_map(|(ref_info, n_hits, score)| {
+      (n_hits >= search_params.min_hits && score >= search_params.min_score).then_some(MinimizerSearchDatasetResult {
+        name: ref_info.name.clone(),
+        length: ref_info.length,
+        n_hits,
+        score,
+      })
+    })
+    .sorted_by_key(|result| -OrderedFloat(result.score))
+    .collect_vec();
+
+  Ok(MinimizerSearchResult {
+    total_hits,
+    max_score,
+    datasets,
+  })
+}
+
+const fn invertible_hash(x: u64) -> u64 {
+  let m: u64 = (1 << 32) - 1;
+  let mut x: u64 = (!x).wrapping_add(x << 21) & m;
+  x = x ^ (x >> 24);
+  x = (x + (x << 3) + (x << 8)) & m;
+  x = x ^ (x >> 14);
+  x = (x + (x << 2) + (x << 4)) & m;
+  x = x ^ (x >> 28);
+  x = (x + (x << 31)) & m;
+  x
+}
+
+fn get_hash(kmer: &[u8], params: &MinimizerIndexParams) -> u64 {
+  let cutoff = params.cutoff as u64;
+
+  let mut x = 0;
+  let mut j = 0;
+
+  for (i, nuc) in kmer.iter().enumerate() {
+    let nuc = *nuc as char;
+
+    if i % 3 == 2 {
+      continue; // skip every third nucleotide to pick up conserved patterns
+    }
+
+    if !"ACGT".contains(nuc) {
+      return cutoff + 1; // break out of loop, return hash above cutoff
+    }
+
+    // A=11=3, C=10=2, G=00=0, T=01=1
+    if "AC".contains(nuc) {
+      x += 1 << j;
+    }
+
+    if "AT".contains(nuc) {
+      x += 1 << (j + 1);
+    }
+
+    j += 2;
+  }
+
+  invertible_hash(x)
+}
+
+pub fn get_ref_search_minimizers(seq: &FastaRecord, params: &MinimizerIndexParams) -> Vec<u64> {
+  let k = params.k as usize;
+  let cutoff = params.cutoff as u64;
+
+  let seq_str = preprocess_seq(&seq.seq);
+  let n = seq_str.len().saturating_sub(k);
+  let mut minimizers = Vec::with_capacity(n);
+  for i in 0..n {
+    let kmer = &seq_str.as_bytes()[i..i + k];
+    let mhash = get_hash(kmer, params);
+    // accept only hashes below cutoff --> reduces the size of the index and the number of lookups
+    if mhash < cutoff {
+      minimizers.push(mhash);
+    }
+  }
+  minimizers.into_iter().unique().collect_vec()
+}
+
+fn preprocess_seq(seq: impl AsRef<str>) -> String {
+  seq.as_ref().to_uppercase().replace('-', "")
+}
diff --git a/packages_rs/nextclade/src/sort/mod.rs b/packages_rs/nextclade/src/sort/mod.rs
new file mode 100644
index 000000000..c8b4c34db
--- /dev/null
+++ b/packages_rs/nextclade/src/sort/mod.rs
@@ -0,0 +1,3 @@
+pub mod minimizer_index;
+pub mod minimizer_search;
+pub mod params;
diff --git a/packages_rs/nextclade/src/sort/params.rs b/packages_rs/nextclade/src/sort/params.rs
new file mode 100644
index 000000000..a3a93f264
--- /dev/null
+++ b/packages_rs/nextclade/src/sort/params.rs
@@ -0,0 +1,28 @@
+use clap::Parser;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+#[allow(clippy::struct_excessive_bools)]
+#[derive(Parser, Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct NextcladeSeqSortParams {
+  /// Minimum value of the score being considered for a detection
+  #[clap(long)]
+  #[clap(default_value_t = NextcladeSeqSortParams::default().min_score)]
+  pub min_score: f64,
+
+  /// Minimum number of the index hits required for a detection
+  #[clap(long)]
+  #[clap(default_value_t = NextcladeSeqSortParams::default().min_hits)]
+  pub min_hits: u64,
+}
+
+#[allow(clippy::derivable_impls)]
+impl Default for NextcladeSeqSortParams {
+  fn default() -> Self {
+    Self {
+      min_score: 0.3,
+      min_hits: 10,
+    }
+  }
+}
diff --git a/packages_rs/nextclade/src/translate/translate_genes.rs b/packages_rs/nextclade/src/translate/translate_genes.rs
index 2679a2363..d8081a08d 100644
--- a/packages_rs/nextclade/src/translate/translate_genes.rs
+++ b/packages_rs/nextclade/src/translate/translate_genes.rs
@@ -9,10 +9,11 @@ use crate::analyze::count_gaps::GapCounts;
 use crate::coord::coord_map_global::CoordMapGlobal;
 use crate::coord::coord_map_local::CoordMapLocal;
 use crate::coord::position::PositionLike;
-use crate::coord::range::{AaRefRange, Range};
+use crate::coord::range::{AaRefRange, NucRefGlobalRange, Range};
 use crate::gene::cds::Cds;
 use crate::gene::gene::Gene;
 use crate::gene::gene_map::GeneMap;
+use crate::translate::aa_alignment_ranges::calculate_aa_alignment_ranges_in_place;
 use crate::translate::extract::extract_cds_from_aln;
 use crate::translate::frame_shifts_detect::frame_shifts_detect;
 use crate::translate::frame_shifts_translate::{frame_shifts_transform_coordinates, FrameShift};
@@ -28,7 +29,7 @@ use num_traits::clamp_max;
 use rayon::iter::Either;
 use serde::{Deserialize, Serialize};
 
-#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
+#[derive(Debug, Default, Clone, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct Translation {
   pub genes: IndexMap<String, GeneTranslation>,
@@ -303,6 +304,7 @@ pub fn translate_genes(
   ref_peptides: &Translation,
   gene_map: &GeneMap,
   coord_map_global: &CoordMapGlobal,
+  global_alignment_range: &NucRefGlobalRange,
   gap_open_close_aa: &[i32],
   params: &AlignPairwiseParams,
 ) -> Result<Translation, Report> {
@@ -344,5 +346,10 @@ pub fn translate_genes(
     })
     .collect::<Result<IndexMap<String, GeneTranslation>, Report>>()?;
 
-  Ok(Translation { genes })
+  let mut translation = Translation { genes };
+
+  // FIXME: Avoid another loop
+  calculate_aa_alignment_ranges_in_place(global_alignment_range, &mut translation, gene_map)?;
+
+  Ok(translation)
 }
diff --git a/packages_rs/nextclade/src/tree/params.rs b/packages_rs/nextclade/src/tree/params.rs
index 4171ee034..d751d2324 100644
--- a/packages_rs/nextclade/src/tree/params.rs
+++ b/packages_rs/nextclade/src/tree/params.rs
@@ -14,6 +14,9 @@ pub struct TreeBuilderParams {
   #[clap(long)]
   #[clap(num_args=0..=1, default_missing_value = "true")]
   pub without_greedy_tree_builder: bool,
+
+  #[clap(long)]
+  pub masked_muts_weight: f64,
 }
 
 #[allow(clippy::derivable_impls)]
@@ -21,6 +24,7 @@ impl Default for TreeBuilderParams {
   fn default() -> Self {
     Self {
       without_greedy_tree_builder: false,
+      masked_muts_weight: 0.05,
     }
   }
 }
diff --git a/packages_rs/nextclade/src/tree/split_muts.rs b/packages_rs/nextclade/src/tree/split_muts.rs
index 50493824e..73942395a 100644
--- a/packages_rs/nextclade/src/tree/split_muts.rs
+++ b/packages_rs/nextclade/src/tree/split_muts.rs
@@ -6,7 +6,6 @@ use crate::coord::position::PositionLike;
 use crate::make_internal_error;
 use eyre::{Report, WrapErr};
 use itertools::{chain, Itertools};
-use regex::internal::Input;
 use std::collections::BTreeMap;
 use std::fmt::Display;
 use std::hash::Hash;
@@ -244,9 +243,9 @@ where
 pub fn difference_of_muts(left: &BranchMutations, right: &BranchMutations) -> Result<BranchMutations, Report> {
   Ok(BranchMutations {
     nuc_muts: difference(&left.nuc_muts, &right.nuc_muts)
-      .wrap_err("When calculating union of private nucleotide substitutions")?,
+      .wrap_err("When calculating difference of private nucleotide substitutions")?,
     aa_muts: difference_of_aa_muts(&left.aa_muts, &right.aa_muts)
-      .wrap_err("When calculating union of private aminoacid mutations")?,
+      .wrap_err("When calculating difference of private aminoacid mutations")?,
   })
 }
 
diff --git a/packages_rs/nextclade/src/tree/split_muts2.rs b/packages_rs/nextclade/src/tree/split_muts2.rs
index d1dd46fab..5fe4ab0f0 100644
--- a/packages_rs/nextclade/src/tree/split_muts2.rs
+++ b/packages_rs/nextclade/src/tree/split_muts2.rs
@@ -4,7 +4,6 @@ use crate::analyze::nuc_del::NucDel;
 use crate::analyze::nuc_sub::NucSub;
 use crate::tree::split_muts::SplitMutsResult;
 use itertools::Itertools;
-use regex::internal::Input;
 use std::collections::{BTreeMap, HashSet};
 
 /// Split mutations into 3 groups:
diff --git a/packages_rs/nextclade/src/tree/tree.rs b/packages_rs/nextclade/src/tree/tree.rs
index 9aa4c5de6..0ddedf9ef 100644
--- a/packages_rs/nextclade/src/tree/tree.rs
+++ b/packages_rs/nextclade/src/tree/tree.rs
@@ -2,6 +2,7 @@ use crate::alphabet::aa::Aa;
 use crate::alphabet::nuc::Nuc;
 use crate::analyze::find_private_nuc_mutations::BranchMutations;
 use crate::coord::position::{AaRefPosition, NucRefGlobalPosition};
+use crate::coord::range::NucRefGlobalRange;
 use crate::graph::edge::{Edge, GraphEdge};
 use crate::graph::graph::Graph;
 use crate::graph::node::{GraphNode, Node};
@@ -54,10 +55,14 @@ pub type AuspiceGraphEdge = Edge<AuspiceGraphNodePayload>;
 pub struct GraphTempData {
   pub max_divergence: f64,
   pub divergence_units: DivergenceUnits,
+  pub other: serde_json::Value,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AuspiceGraphMeta {
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub auspice_tree_version: Option<String>,
+
   pub meta: AuspiceTreeMeta,
 
   #[serde(skip)]
@@ -302,24 +307,50 @@ impl AuspiceTreeNode {
   }
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Debug)]
+#[derive(Clone, Serialize, Deserialize, Eq, PartialEq, schemars::JsonSchema, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct CladeNodeAttrKeyDesc {
   pub name: String,
   pub display_name: String,
-  pub description: String,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub description: Option<String>,
   #[serde(default)]
   pub hide_in_web: bool,
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Validate, Debug)]
+#[derive(Clone, Default, Serialize, Deserialize, Eq, PartialEq, schemars::JsonSchema, Validate, Debug)]
 pub struct AuspiceMetaExtensionsNextclade {
-  pub clade_node_attrs: Option<Vec<CladeNodeAttrKeyDesc>>,
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub clade_node_attrs: Vec<CladeNodeAttrKeyDesc>,
+
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
+  pub placement_mask_ranges: Vec<NucRefGlobalRange>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Validate, Debug)]
+impl AuspiceMetaExtensionsNextclade {
+  pub fn is_empty(&self) -> bool {
+    self == &Self::default()
+  }
+}
+
+#[derive(Clone, Default, Serialize, Deserialize, Eq, PartialEq, schemars::JsonSchema, Validate, Debug)]
 pub struct AuspiceMetaExtensions {
-  pub nextclade: Option<AuspiceMetaExtensionsNextclade>,
+  #[serde(default, skip_serializing_if = "AuspiceMetaExtensionsNextclade::is_empty")]
+  pub nextclade: AuspiceMetaExtensionsNextclade,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
+}
+
+impl AuspiceMetaExtensions {
+  pub fn is_empty(&self) -> bool {
+    self == &Self::default()
+  }
 }
 
 #[derive(Clone, Debug, Default, Serialize, Deserialize, schemars::JsonSchema, Validate)]
@@ -334,9 +365,12 @@ pub struct AuspiceColoring {
   #[serde(skip_serializing_if = "Vec::is_empty")]
   #[serde(default)]
   pub scale: Vec<[String; 2]>,
+
+  #[serde(flatten)]
+  pub other: serde_json::Value,
 }
 
-#[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Validate, Debug)]
+#[derive(Clone, Default, Serialize, Deserialize, Eq, PartialEq, schemars::JsonSchema, Validate, Debug)]
 pub struct AuspiceDisplayDefaults {
   #[serde(skip_serializing_if = "Option::is_none")]
   pub branch_label: Option<String>,
@@ -351,23 +385,27 @@ pub struct AuspiceDisplayDefaults {
   pub other: serde_json::Value,
 }
 
+impl AuspiceDisplayDefaults {
+  pub fn is_empty(&self) -> bool {
+    self == &Self::default()
+  }
+}
+
 #[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Validate, Debug)]
 pub struct AuspiceTreeMeta {
-  #[serde(skip_serializing_if = "Option::is_none")]
-  pub extensions: Option<AuspiceMetaExtensions>,
+  #[serde(default, skip_serializing_if = "AuspiceMetaExtensions::is_empty")]
+  pub extensions: AuspiceMetaExtensions,
 
-  #[serde(skip_serializing_if = "Vec::<AuspiceColoring>::is_empty")]
-  #[serde(default)]
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub colorings: Vec<AuspiceColoring>,
 
-  #[serde(skip_serializing_if = "Vec::<String>::is_empty")]
-  #[serde(default)]
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub panels: Vec<String>,
 
-  #[serde(skip_serializing_if = "Vec::<String>::is_empty")]
-  #[serde(default)]
+  #[serde(default, skip_serializing_if = "Vec::is_empty")]
   pub filters: Vec<String>,
 
+  #[serde(default, skip_serializing_if = "AuspiceDisplayDefaults::is_empty")]
   pub display_defaults: AuspiceDisplayDefaults,
 
   #[serde(skip_serializing_if = "Option::is_none")]
@@ -378,18 +416,19 @@ pub struct AuspiceTreeMeta {
 }
 
 impl AuspiceTreeMeta {
-  #[rustfmt::skip]
-  pub fn clade_node_attr_descs_maybe(&self) -> Option<&[CladeNodeAttrKeyDesc]> {
-    self
-      .extensions.as_ref()?
-      .nextclade.as_ref()?
-      .clade_node_attrs.as_deref()
+  const fn extensions_nextclade(&self) -> &AuspiceMetaExtensionsNextclade {
+    &self.extensions.nextclade
+  }
+
+  /// Extract placement masks
+  pub fn placement_mask_ranges(&self) -> &[NucRefGlobalRange] {
+    self.extensions_nextclade().placement_mask_ranges.as_slice()
   }
 
-  /// Extracts a list of descriptions of clade-like node attributes.
+  /// Extract a list of descriptions of clade-like node attributes.
   /// These tell what additional entries to expect in node attributes (`node_attr`) of nodes.
   pub fn clade_node_attr_descs(&self) -> &[CladeNodeAttrKeyDesc] {
-    self.clade_node_attr_descs_maybe().unwrap_or(&[])
+    self.extensions_nextclade().clade_node_attrs.as_slice()
   }
 }
 
@@ -427,6 +466,9 @@ impl DivergenceUnits {
 
 #[derive(Clone, Serialize, Deserialize, schemars::JsonSchema, Validate, Debug)]
 pub struct AuspiceTree {
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub version: Option<String>,
+
   pub meta: AuspiceTreeMeta,
 
   pub tree: AuspiceTreeNode,
@@ -439,20 +481,16 @@ pub type AuspiceTreeNodeIter<'a> = Iter<'a, AuspiceTreeNode>;
 
 pub type AuspiceTreeNodeIterFn<'a> = fn(&'a AuspiceTreeNode) -> AuspiceTreeNodeIter<'_>;
 
-impl FromStr for AuspiceTree {
-  type Err = Report;
-
-  fn from_str(s: &str) -> Result<Self, Self::Err> {
-    json_parse(s).wrap_err("When parsing Auspice Tree JSON contents")
-  }
-}
-
 impl AuspiceTree {
   pub fn from_path(filepath: impl AsRef<Path>) -> Result<Self, Report> {
     let filepath = filepath.as_ref();
     let data =
       read_file_to_string(filepath).wrap_err_with(|| format!("When reading Auspice Tree JSON file {filepath:#?}"))?;
-    Self::from_str(&data).wrap_err_with(|| format!("When parsing Auspice Tree JSON file {filepath:#?}"))
+    Self::from_str(data).wrap_err_with(|| format!("When parsing Auspice Tree JSON file {filepath:#?}"))
+  }
+
+  pub fn from_str(s: impl AsRef<str>) -> Result<Self, Report> {
+    json_parse(s).wrap_err("When parsing Auspice Tree JSON contents")
   }
 
   pub fn to_string_pretty(&self) -> Result<String, Report> {
diff --git a/packages_rs/nextclade/src/tree/tree_builder.rs b/packages_rs/nextclade/src/tree/tree_builder.rs
index 6880458a6..4b638ad04 100644
--- a/packages_rs/nextclade/src/tree/tree_builder.rs
+++ b/packages_rs/nextclade/src/tree/tree_builder.rs
@@ -1,11 +1,11 @@
 use crate::analyze::aa_del::AaDel;
 use crate::analyze::aa_sub::AaSub;
-use crate::analyze::divergence::{calculate_branch_length, count_nuc_muts};
+use crate::analyze::divergence::{calculate_branch_length, score_nuc_muts};
 use crate::analyze::find_private_nuc_mutations::BranchMutations;
 use crate::analyze::nuc_del::NucDel;
 use crate::analyze::nuc_sub::NucSub;
-use crate::graph::node::GraphNodeKey;
-use crate::make_internal_report;
+use crate::coord::range::NucRefGlobalRange;
+use crate::graph::node::{GraphNodeKey, Node};
 use crate::tree::params::TreeBuilderParams;
 use crate::tree::split_muts::{difference_of_muts, split_muts, union_of_muts, SplitMutsResult};
 use crate::tree::tree::{AuspiceGraph, AuspiceGraphEdgePayload, AuspiceGraphNodePayload, TreeBranchAttrsLabels};
@@ -15,7 +15,6 @@ use crate::types::outputs::NextcladeOutputs;
 use crate::utils::collections::concat_to_vec;
 use eyre::{Report, WrapErr};
 use itertools::Itertools;
-use regex::internal::Input;
 use std::collections::BTreeMap;
 
 pub fn graph_attach_new_nodes_in_place(
@@ -87,7 +86,7 @@ pub fn graph_attach_new_node_in_place(
   } else {
     // for the attachment on the reference tree ('result') fine tune the position
     // on the updated graph to minimize the number of private mutations
-    finetune_nearest_node(graph, result.nearest_node_id, &mutations_seq)?
+    finetune_nearest_node(graph, result.nearest_node_id, &mutations_seq, params)?
   };
 
   // add the new node at the fine tuned position while accounting for shared mutations
@@ -97,110 +96,143 @@ pub fn graph_attach_new_node_in_place(
   Ok(())
 }
 
+/// Moves the new sequences, defined by its set of private mutations
+/// along the tree starting at the `nearest_node`. As the new sequence is moved, the
+/// private mutations are updated. This is repeated until the number of private mutations (nuc)
+/// can not be reduced further by moving the node. At the end of the loop, the nearest node
+/// is either the closest possible point, or this closest point is along the branch leading
+/// to the nearest_node.
 pub fn finetune_nearest_node(
   graph: &AuspiceGraph,
   nearest_node_key: GraphNodeKey,
   seq_private_mutations: &BranchMutations,
+  params: &TreeBuilderParams,
 ) -> Result<(GraphNodeKey, BranchMutations), Report> {
-  let mut current_best_node = graph.get_node(nearest_node_key)?;
+  let masked_ranges = graph.data.meta.placement_mask_ranges();
+  let mut best_node = graph.get_node(nearest_node_key)?;
   let mut private_mutations = seq_private_mutations.clone();
 
   loop {
-    let mut best_node = current_best_node;
-    let (mut best_split_result, mut n_shared_muts) = if current_best_node.is_root() {
-      // don't include node if node is root as we don't attach nodes above the root
-      let best_split_result = SplitMutsResult {
-        left: private_mutations.clone(),
-        right: BranchMutations::default(),
-        shared: BranchMutations::default(),
-      };
-      (best_split_result, 0)
-    } else {
-      let best_split_result = split_muts(
-        &current_best_node.payload().tmp.private_mutations.invert(),
-        &private_mutations,
-      )
-      .wrap_err_with(|| {
+    // Check how many mutations are shared with the branch leading to the current_best_node or any of its children
+    let (candidate_node, candidate_split, shared_muts_score) =
+      find_shared_muts(graph, best_node, &private_mutations, masked_ranges, params).wrap_err_with(|| {
         format!(
-          "When splitting mutations between query sequence and the nearest node '{}'",
-          current_best_node.payload().name
+          "When calculating shared mutations against the current best node '{}'",
+          best_node.payload().name
         )
       })?;
-      let n_shared_muts = count_nuc_muts(&best_split_result.shared.nuc_muts);
-      (best_split_result, n_shared_muts)
-    };
 
-    for child in graph.iter_children_of(current_best_node) {
-      let tmp_split_result =
-        split_muts(&child.payload().tmp.private_mutations, &private_mutations).wrap_err_with(|| {
-          format!(
-            "When splitting mutations between query sequence and the child node '{}'",
-            child.payload().name
-          )
-        })?;
-      let tmp_n_shared_muts = count_nuc_muts(&tmp_split_result.shared.nuc_muts);
-      if tmp_n_shared_muts > n_shared_muts {
-        n_shared_muts = tmp_n_shared_muts;
-        best_split_result = tmp_split_result;
-        best_node = child;
-      }
+    // Check if the new candidate node is better than the current best
+    let left_muts_score = score_nuc_muts(&candidate_split.left.nuc_muts, masked_ranges, params);
+    match find_better_node_maybe(graph, best_node, candidate_node, shared_muts_score, left_muts_score) {
+      None => break,
+      Some(better_node) => best_node = better_node,
     }
 
-    if n_shared_muts > 0 {
-      if best_node.key() == current_best_node.key() && best_split_result.left.nuc_muts.is_empty() {
-        // All mutations from the parent to the node are shared with private mutations. Move up to the parent.
-        // FIXME: what if there's no parent?
-        current_best_node = graph
-          .parent_of_by_key(best_node.key())
-          .ok_or_else(|| make_internal_report!("Parent node is expected, but not found"))?;
-      } else if best_node.key() == current_best_node.key() {
-        // The best node is the current node. Break.
-        break;
-      } else {
-        // The best node is child
-        current_best_node = graph.get_node(best_node.key())?;
-      }
-      //subtract the shared mutations from the private mutations struct
-      private_mutations = difference_of_muts(&private_mutations, &best_split_result.shared).wrap_err_with(|| {
-        format!(
-          "When calculating difference of mutations between query sequence and the candidate child node '{}'",
-          current_best_node.payload().name
-        )
-      })?;
-      // add the inverted remaining mutations on that branch
-      // even if there are no left-over nuc_subs because they are shared, there can be
-      // changes in the amino acid sequences due to mutations in the same codon that still need handling
-      private_mutations = union_of_muts(&private_mutations, &best_split_result.left.invert()).wrap_err_with(|| {
-        format!(
-          "When calculating union of mutations between query sequence and the candidate child node '{}'",
-          graph.get_node(best_node.key()).expect("Node not found").payload().name
-        )
-      })?;
-    } else if current_best_node.is_leaf()
-      && !current_best_node.is_root()
-      && current_best_node.payload().tmp.private_mutations.nuc_muts.is_empty()
-    {
-      // In this case, a leaf identical to its parent in terms of nuc_subs. this happens when we add
-      // auxiliary nodes.
-
-      // Mutation subtraction is still necessary because there might be shared mutations even if there are no `nuc_subs`.
-      // FIXME: This relies on `is_leaf`. In that case, there is only one entry in `shared_muts_neighbors`
-      // and the `max_shared_muts` is automatically the `current_best_node.key()`. Less error prone would be
-      // to fetch the shared muts corresponding to current_best_node.key()
-      private_mutations = difference_of_muts(&private_mutations, &best_split_result.shared).wrap_err_with(|| {
+    // Update query mutations to adjust for the new position of the placed node
+    private_mutations = update_private_mutations(&private_mutations, &candidate_split).wrap_err_with(|| {
+      format!(
+        "When updating private mutations against the current best node '{}'",
+        best_node.payload().name
+      )
+    })?;
+  }
+
+  Ok((best_node.key(), private_mutations))
+}
+
+/// Check how many mutations are shared with the branch leading to the current_best_node or any of its children
+fn find_shared_muts<'g>(
+  graph: &'g AuspiceGraph,
+  best_node: &'g Node<AuspiceGraphNodePayload>,
+  private_mutations: &BranchMutations,
+  masked_ranges: &[NucRefGlobalRange],
+  params: &TreeBuilderParams,
+) -> Result<(&'g Node<AuspiceGraphNodePayload>, SplitMutsResult, f64), Report> {
+  let (mut candidate_split, mut shared_muts_score) = if best_node.is_root() {
+    // Don't include node if node is root as we don't attach nodes above the root
+    let candidate_split = SplitMutsResult {
+      left: BranchMutations::default(),
+      right: private_mutations.clone(),
+      shared: BranchMutations::default(),
+    };
+    (candidate_split, 0.0)
+  } else {
+    let candidate_split = split_muts(&best_node.payload().tmp.private_mutations.invert(), private_mutations)
+      .wrap_err_with(|| {
         format!(
-          "When subtracting mutations from zero-length parent node '{}'",
-          current_best_node.payload().name
+          "When splitting mutations between query sequence and the nearest node '{}'",
+          best_node.payload().name
         )
       })?;
-      current_best_node = graph
-        .parent_of_by_key(best_node.key())
-        .ok_or_else(|| make_internal_report!("Parent node is expected, but not found"))?;
-    } else {
-      break;
+    let shared_muts_score = score_nuc_muts(&candidate_split.shared.nuc_muts, masked_ranges, params);
+    (candidate_split, shared_muts_score)
+  };
+
+  // Check all child nodes for shared mutations
+  let mut candidate_node = best_node;
+  for child in graph.iter_children_of(best_node) {
+    let child_split = split_muts(&child.payload().tmp.private_mutations, private_mutations).wrap_err_with(|| {
+      format!(
+        "When splitting mutations between query sequence and the child node '{}'",
+        child.payload().name
+      )
+    })?;
+    let child_shared_muts_score = score_nuc_muts(&child_split.shared.nuc_muts, masked_ranges, params);
+    if child_shared_muts_score > shared_muts_score {
+      shared_muts_score = child_shared_muts_score;
+      candidate_split = child_split;
+      candidate_node = child;
     }
   }
-  Ok((current_best_node.key(), private_mutations))
+  Ok((candidate_node, candidate_split, shared_muts_score))
+}
+
+/// Find out if the candidate node is better than the current best (with caveats).
+/// Return a better node or `None` (if the current best node is to be preserved).
+fn find_better_node_maybe<'g>(
+  graph: &'g AuspiceGraph,
+  best_node: &'g Node<AuspiceGraphNodePayload>,
+  candidate_node: &'g Node<AuspiceGraphNodePayload>,
+  shared_muts_score: f64,
+  left_muts_score: f64,
+) -> Option<&'g Node<AuspiceGraphNodePayload>> {
+  if candidate_node == best_node {
+    // best node is the node itself. Move up the tree if all mutations between
+    // the candidate node and its parent are also in the private mutations.
+    // This covers the case where the candidate is a leaf with zero length branch
+    // as the  .left.nuc_muts is empty in that case
+    if left_muts_score == 0.0 {
+      return graph.parent_of(candidate_node);
+    }
+  } else if shared_muts_score > 0.0 {
+    // candidate node is child node, move to child node if there are shared mutations
+    // this should always be the case if the candidate node != best_node
+    return Some(candidate_node);
+  }
+  // no improvement possible. Return None to stay
+  None
+}
+
+/// Update private mutations to match the new best node
+fn update_private_mutations(
+  private_mutations: &BranchMutations,
+  best_split_result: &SplitMutsResult,
+) -> Result<BranchMutations, Report> {
+  // Step 1: subtract shared mutations from private mutations
+  let private_mutations = difference_of_muts(private_mutations, &best_split_result.shared).wrap_err(
+    "When calculating difference of mutations between query sequence and the branch leading to the next attachment point"
+  )?;
+
+  // Step 2: We need to add the inverted remaining mutations on that branch.
+  // Note that this can be necessary even if there are no left-over nuc_subs.
+  // Amino acid mutations can be decoupled from the their nucleotide mutations or
+  // changes in the amino acid sequences due to mutations in the same codon still need handling.
+  let private_mutations = union_of_muts(&private_mutations, &best_split_result.left.invert()).wrap_err(
+    "When calculating union of mutations between query sequence and the branch leading to the next attachment point.",
+  )?;
+
+  Ok(private_mutations)
 }
 
 pub fn attach_to_internal_node(
@@ -261,6 +293,7 @@ pub fn knit_into_graph(
   ref_seq_len: usize,
   params: &TreeBuilderParams,
 ) -> Result<(), Report> {
+  let masked_ranges = graph.data.meta.placement_mask_ranges().to_owned();
   let divergence_units = graph.data.tmp.divergence_units;
 
   // the target node will be the sister of the new node defined by "private mutations" and the "result"
@@ -302,11 +335,31 @@ pub fn knit_into_graph(
       muts_new_node,
     }
   };
-  // if the node is a leaf or if there are shared mutations, need to split the branch above and insert aux node
+  // if the node is a leaf or if there are non-shared mutations, need to split the branch above and insert aux node
   if target_node.is_leaf() || !muts_target_node.nuc_muts.is_empty() {
     // determine divergence of new internal node by subtracting shared reversions from target_node
-    let divergence_middle_node =
-      target_node_div - calculate_branch_length(&muts_target_node.nuc_muts, divergence_units, ref_seq_len);
+    let divergence_middle_node = if target_node.is_root() {
+      target_node_div
+        - calculate_branch_length(
+          &muts_target_node.nuc_muts,
+          &masked_ranges,
+          divergence_units,
+          ref_seq_len,
+        )
+    } else {
+      let parent_node = graph.parent_of(target_node).unwrap();
+      let parent_node_auspice = parent_node.payload();
+      let parent_node_div = &parent_node_auspice.node_attrs.div.unwrap_or(0.0);
+      target_node_div.min(
+        parent_node_div
+          + calculate_branch_length(
+            &muts_common_branch.nuc_muts,
+            &masked_ranges,
+            divergence_units,
+            ref_seq_len,
+          ),
+      )
+    };
 
     // generate new internal node
     // add private mutations, divergence, name and branch attrs to new internal node
@@ -349,7 +402,8 @@ pub fn knit_into_graph(
       new_internal_node_key,
       &muts_new_node,
       result,
-      divergence_middle_node + calculate_branch_length(&muts_new_node.nuc_muts, divergence_units, ref_seq_len),
+      divergence_middle_node
+        + calculate_branch_length(&muts_new_node.nuc_muts, &masked_ranges, divergence_units, ref_seq_len),
     )?;
   } else {
     //can simply attach node
@@ -358,7 +412,7 @@ pub fn knit_into_graph(
       target_key,
       private_mutations,
       result,
-      target_node_div + calculate_branch_length(&muts_new_node.nuc_muts, divergence_units, ref_seq_len),
+      target_node_div + calculate_branch_length(&muts_new_node.nuc_muts, &masked_ranges, divergence_units, ref_seq_len),
     )?;
   }
   Ok(())
diff --git a/packages_rs/nextclade/src/tree/tree_find_nearest_node.rs b/packages_rs/nextclade/src/tree/tree_find_nearest_node.rs
index 635d85809..4fbe0e3f9 100644
--- a/packages_rs/nextclade/src/tree/tree_find_nearest_node.rs
+++ b/packages_rs/nextclade/src/tree/tree_find_nearest_node.rs
@@ -22,8 +22,9 @@ pub fn graph_find_nearest_nodes(
   qry_nuc_subs: &[NucSub],
   qry_missing: &[NucRange],
   aln_range: &NucRefGlobalRange,
-  masked_ranges: &[NucRefGlobalRange],
 ) -> Result<Vec<TreePlacementInfo>, Report> {
+  let masked_ranges = graph.data.meta.placement_mask_ranges();
+
   // Iterate over tree nodes and calculate distance metric between the sample and each node
   let nodes_by_placement_score = DftPre::new(graph.get_exactly_one_root()?, |node| graph.iter_children_of(node))
     .map(|(_, node)| {
diff --git a/packages_rs/nextclade/src/tree/tree_preprocess.rs b/packages_rs/nextclade/src/tree/tree_preprocess.rs
index e069cbaa4..ef4a1afb6 100644
--- a/packages_rs/nextclade/src/tree/tree_preprocess.rs
+++ b/packages_rs/nextclade/src/tree/tree_preprocess.rs
@@ -201,7 +201,7 @@ fn map_aa_muts_for_one_gene(
 
         if ref_peptide.len() < mutation.pos.as_usize() {
           return make_error!(
-          "When preprocessing reference tree node {}: amino acid mutation {}:{} is outside of the peptide {} (length {}). This is likely an inconsistency between reference tree, reference sequence, and gene map in the Nextclade dataset",
+          "When preprocessing reference tree node {}: amino acid mutation {}:{} is outside of the peptide {} (length {}). This is likely an inconsistency between reference tree, reference sequence, and genome annotation in the Nextclade dataset",
           node.name,
           gene_name,
           mutation.to_string_without_gene(),
@@ -252,6 +252,7 @@ pub fn add_auspice_metadata_in_place(meta: &mut AuspiceTreeMeta) {
       title: "Node type".to_owned(),
       type_: "categorical".to_owned(),
       scale: vec![pair("New", "#ff6961"), pair("Reference", "#999999")],
+      other: serde_json::Value::default(),
     },
     AuspiceColoring {
       key: "QC Status".to_owned(),
@@ -262,12 +263,14 @@ pub fn add_auspice_metadata_in_place(meta: &mut AuspiceTreeMeta) {
         pair("mediocre", "#cab44d"),
         pair("bad", "#CA738E"),
       ],
+      other: serde_json::Value::default(),
     },
     AuspiceColoring {
       key: "Has PCR primer changes".to_owned(),
       title: "Has PCR primer changes".to_owned(),
       type_: "categorical".to_owned(),
       scale: vec![pair("Yes", "#6961ff"), pair("No", "#999999")],
+      other: serde_json::Value::default(),
     },
   ];
 
diff --git a/packages_rs/nextclade/src/types/outputs.rs b/packages_rs/nextclade/src/types/outputs.rs
index 9a0562df0..16b0da96d 100644
--- a/packages_rs/nextclade/src/types/outputs.rs
+++ b/packages_rs/nextclade/src/types/outputs.rs
@@ -1,5 +1,4 @@
-use crate::align::backtrace::AlignmentOutput;
-use crate::align::insertions_strip::{AaIns, Insertion, StripInsertionsResult};
+use crate::align::insertions_strip::{AaIns, Insertion};
 use crate::alphabet::nuc::Nuc;
 use crate::analyze::aa_changes::AaChangesGroup;
 use crate::analyze::aa_del::AaDel;
@@ -11,13 +10,11 @@ use crate::analyze::letter_ranges::{GeneAaRange, NucRange};
 use crate::analyze::nuc_del::NucDelRange;
 use crate::analyze::nuc_sub::NucSub;
 use crate::analyze::pcr_primer_changes::PcrPrimerChange;
-use crate::coord::coord_map_global::CoordMapGlobal;
 use crate::coord::range::{AaRefRange, NucRefGlobalRange};
 use crate::graph::node::GraphNodeKey;
 use crate::io::json::json_parse;
 use crate::qc::qc_run::QcResult;
 use crate::translate::frame_shifts_translate::FrameShift;
-use crate::translate::translate_genes::Translation;
 use eyre::{Report, WrapErr};
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
@@ -29,19 +26,6 @@ pub struct PeptideWarning {
   pub warning: String,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct NextalignOutputs {
-  pub alignment: AlignmentOutput<Nuc>,
-  pub stripped: StripInsertionsResult<Nuc>,
-  pub translation: Translation,
-  pub aa_insertions: Vec<AaIns>,
-  pub warnings: Vec<PeptideWarning>,
-  pub missing_genes: Vec<String>,
-  pub is_reverse_complement: bool,
-  pub coord_map_global: CoordMapGlobal,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct PhenotypeValue {
diff --git a/packages_rs/nextclade/src/utils/boolean.rs b/packages_rs/nextclade/src/utils/boolean.rs
new file mode 100644
index 000000000..be47d7f79
--- /dev/null
+++ b/packages_rs/nextclade/src/utils/boolean.rs
@@ -0,0 +1,11 @@
+/// Useful for `#[serde(default = "bool_true")]`
+#[inline]
+pub const fn bool_true() -> bool {
+  true
+}
+
+/// Useful for `#[serde(default = "bool_false")]`
+#[inline]
+pub const fn bool_false() -> bool {
+  false
+}
diff --git a/packages_rs/nextclade/src/utils/getenv.rs b/packages_rs/nextclade/src/utils/getenv.rs
index a65dea649..6afa4e696 100644
--- a/packages_rs/nextclade/src/utils/getenv.rs
+++ b/packages_rs/nextclade/src/utils/getenv.rs
@@ -1,4 +1,4 @@
-#[macro_export(local_inner_macros)]
+#[macro_export]
 macro_rules! getenv {
   ($arg:tt) => {{
     match core::option_env!($arg) {
diff --git a/packages_rs/nextclade/src/utils/info.rs b/packages_rs/nextclade/src/utils/info.rs
new file mode 100644
index 000000000..c78746808
--- /dev/null
+++ b/packages_rs/nextclade/src/utils/info.rs
@@ -0,0 +1,26 @@
+use lazy_static::lazy_static;
+use semver::Version;
+
+pub fn this_package_name() -> &'static str {
+  lazy_static! {
+    pub static ref PKG_NAME: &'static str = env!("CARGO_PKG_NAME");
+  }
+  &PKG_NAME
+}
+
+pub fn this_package_version() -> &'static Version {
+  lazy_static! {
+    pub static ref VERSION: Version = Version::parse(env!("CARGO_PKG_VERSION")).expect(
+      "Unable to parse env var `CARGO_PKG_VERSION` in semantic version format. \
+        In most cases it comes from `version` field in `Cargo.toml` file."
+    );
+  }
+  &VERSION
+}
+
+pub fn this_package_version_str() -> &'static str {
+  lazy_static! {
+    pub static ref VERSION_STR: String = this_package_version().to_string();
+  }
+  &VERSION_STR
+}
diff --git a/packages_rs/nextclade/src/utils/mod.rs b/packages_rs/nextclade/src/utils/mod.rs
index 754e5e19c..4a269f3b1 100644
--- a/packages_rs/nextclade/src/utils/mod.rs
+++ b/packages_rs/nextclade/src/utils/mod.rs
@@ -1,8 +1,10 @@
+pub mod boolean;
 pub mod collections;
 pub mod datetime;
 pub mod error;
 pub mod getenv;
 pub mod global_init;
+pub mod info;
 pub mod num;
 pub mod option;
 pub mod string;
diff --git a/packages_rs/nextclade/src/utils/option.rs b/packages_rs/nextclade/src/utils/option.rs
index cb0ad0f5a..5ff800ad2 100644
--- a/packages_rs/nextclade/src/utils/option.rs
+++ b/packages_rs/nextclade/src/utils/option.rs
@@ -36,3 +36,26 @@ impl<'o, T: 'o> OptionMapRefFallible<'o, T> for Option<T> {
     (*self).as_ref().map(f).transpose()
   }
 }
+
+pub trait OptionMapMutFallible<'o, T: 'o> {
+  /// Borrows the internal value of an `Option`, maps it using the provided closure
+  /// and transposes `Option` of `Result` to `Result` of `Option`.
+  ///
+  /// Convenient to use with fallible mapping functions (which returns a `Result`)
+  ///
+  /// Inspired by
+  /// https://github.com/ammongit/rust-ref-map/blob/4b1251c6d2fd192d89a114395b36aeeab5c5433c/src/option.rs
+  fn map_mut_fallible<U, F, E>(&'o mut self, f: F) -> Result<Option<U>, E>
+  where
+    F: FnOnce(&'o mut T) -> Result<U, E>;
+}
+
+impl<'o, T: 'o> OptionMapMutFallible<'o, T> for Option<T> {
+  #[inline]
+  fn map_mut_fallible<U, F, E>(&'o mut self, f: F) -> Result<Option<U>, E>
+  where
+    F: FnOnce(&'o mut T) -> Result<U, E>,
+  {
+    (*self).as_mut().map(f).transpose()
+  }
+}
diff --git a/packages_rs/nextclade/src/utils/string.rs b/packages_rs/nextclade/src/utils/string.rs
index f5db5b1d1..69c9697ca 100644
--- a/packages_rs/nextclade/src/utils/string.rs
+++ b/packages_rs/nextclade/src/utils/string.rs
@@ -1,3 +1,6 @@
+use itertools::Itertools;
+use strsim::sorensen_dice;
+
 /// Return copy of a string surrounded with quotation marks
 #[must_use]
 pub fn surround_with_quotes(s: impl AsRef<str>) -> String {
@@ -29,3 +32,15 @@ macro_rules! o {
     ToOwned::to_owned($x)
   };
 }
+
+pub fn find_similar_strings<T: AsRef<str> + Copy, U: AsRef<str>>(
+  haystack: impl Iterator<Item = T>,
+  needle: U,
+) -> impl Iterator<Item = T> {
+  let scores = haystack
+    .map(|candidate| (candidate, sorensen_dice(candidate.as_ref(), needle.as_ref())))
+    .filter(|(_, score)| *score > 0.0)
+    .sorted_by_key(|(_, score)| -(score * 1000.0) as isize)
+    .collect_vec();
+  scores.into_iter().map(|(candidate, _)| candidate)
+}
diff --git a/scripts/build_on_vercel.sh b/scripts/build_on_vercel.sh
index 11a877f2e..e28133150 100755
--- a/scripts/build_on_vercel.sh
+++ b/scripts/build_on_vercel.sh
@@ -115,6 +115,8 @@ sed -i'' "s|PROD_ENABLE_TYPE_CHECKS=1|PROD_ENABLE_TYPE_CHECKS=0|g" .env
 sed -i'' "s|PROD_ENABLE_ESLINT=1|PROD_ENABLE_ESLINT=0|g" .env
 sed -i'' "s|PROD_ENABLE_STYLELINT=1|PROD_ENABLE_STYLELINT=0|g" .env
 
+sed -i'' "s|DATA_TRY_GITHUB_BRANCH=0|DATA_TRY_GITHUB_BRANCH=1|g" .env
+
 cd packages_rs/nextclade-web
 
 yarn install --frozen-lockfile
diff --git a/tests/run-smoke-tests b/tests/run-smoke-tests
index a3e10e127..52340772a 100755
--- a/tests/run-smoke-tests
+++ b/tests/run-smoke-tests
@@ -1,119 +1,224 @@
 #!/usr/bin/env bash
-# shellcheck disable=SC2155
-set -euo pipefail
+set -euo pipefail -o errtrace
 trap "exit 0" INT
 
 # Runs smoke tests - basic use-case checks with default data
 # https://en.wikipedia.org/wiki/Smoke_testing_(software)
 #
 # Dependencies:
-#   sudo apt-get install -y bash curl parallel
-#   mkdir -p "${HOME}/bin"
-#   export PATH="${HOME}/bin:${PATH}"
-#   curl -fsSL "https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64" -o ${HOME}/bin/jq && chmod +x ${HOME}/bin/jq
-
-export NEXTCLADE_BIN="${1:? "Usage: ${0} path_to_nextclade"}"
-export NEXTCLADE_BIN
-
-THIS_DIR=$(
-  cd "$(dirname "${BASH_SOURCE[0]}")"
-  pwd
-)
+#   sudo apt-get install -y bash parallel
+#
+# Usage (NOTE: you must build and re-build Nextclade executable yourself, this script does not do that):
+#
+# 1. Download datasets from the default dataset server and run tests with a given nextclade executable:
+#
+#     ./tests/run-smoke-tests 'target/release/nextclade'
+#
+# 2. Run tests with a given nextclade executable and a directory containing datasets. Dataset directories are
+#    identified as directories containing a `pathogen.json` file.
+#
+#     ./tests/run-smoke-tests 'target/release/nextclade' '.../nextclade_data/data_output'
+#
+# The downloaded datasets will be in $DATASETS_DIR and Nextclade output files will be in $RESULTS_DIR (see below)
+
+export NEXTCLADE_BIN="${1:? "Usage: ${0} <path_to_nextclade> [path_to_dataset_collection_dir]"}"
+export INPUT_DATASETS_DIR="${2:-}"
+
+THIS_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")";  pwd)
 export THIS_DIR
 
-export DATASET_DIR="$THIS_DIR/../tmp/smoke-tests/dataset"
-export OUT_DIR="$THIS_DIR/../tmp/smoke-tests/result"
+export DATASETS_DIR="${THIS_DIR}/../tmp/smoke-tests/dataset"
+export RESULTS_DIR="${THIS_DIR}/../tmp/smoke-tests/result"
 
-export dataset_names_and_refs=$(${NEXTCLADE_BIN} dataset list --json | jq -r '.[] | select(.attributes.tag.isDefault==true) |  .attributes.name.value + ";" + .attributes.reference.value' | sort | uniq)
-export dataset_names_without_refs=$(${NEXTCLADE_BIN} dataset list --json | jq -r '.[] | select(.attributes.tag.isDefault==true) |  .attributes.name.value + ";"' | sort | uniq)
-export all_datasets="${dataset_names_and_refs} ${dataset_names_without_refs}"
-export num_datasets=$(echo "${all_datasets}" | wc -l)
-export JOBS="${JOBS:=${num_datasets}}"
 
-function run_one_test() {
-  name_and_ref=$1
+function run_with_dataset_dir() {
+  set -o errtrace
 
-  # shellcheck disable=SC2206
-  arr=(${name_and_ref//;/ })
-  name="${arr[0]}"
-  reference="${arr[1]}"
+  name="${1}"
+  dataset_dir="${2}"
+  sequences="${3}"
+  out_dir="${RESULTS_DIR}/${name}/with_dataset"
 
-  # This dataset is crashing, due to a defect in the dataset
-  if [ "$name" == "sars-cov-2-21L" ]; then
-    return
-  fi
+  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
+    --input-dataset="${dataset_dir}" \
+    --output-translations="${out_dir}/translations/gene_{gene}.translation.fasta" \
+    --output-all="${out_dir}" \
+    "${sequences}"
+}
+export -f run_with_dataset_dir
 
-  # Temporarily disable default reference
-  if [ -z "$reference" ]; then
-    return
-  fi
 
-  echo "Running '${NEXTCLADE_BIN}' for '${name}' with reference '${reference:-default}'"
+function run_with_dataset_zip() {
+  name="${1}"
+  dataset_dir="${2}"
+  sequences="${3}"
+  out_dir="${RESULTS_DIR}/${name}/with_dataset_zip"
 
-  ${NEXTCLADE_BIN} dataset get --name="${name}" ${reference:+--reference "${reference}"} --output-dir="$DATASET_DIR/${name}_${reference:-default}"
+  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
+    --input-dataset="${dataset_dir}/dataset.zip" \
+    --output-translations="${out_dir}/translations/gene_{gene}.translation.fasta" \
+    --output-all="${out_dir}" \
+    "${sequences}"
+}
+export -f run_with_dataset_zip
+
+
+function run_with_ref_only() {
+  name="${1}"
+  dataset_dir="${2}"
+  sequences="${3}"
+  out_dir="${RESULTS_DIR}/${name}/with_ref_only"
 
-  ${NEXTCLADE_BIN} run --in-order --include-reference \
-    --input-dataset="$DATASET_DIR/${name}_${reference:-default}" \
-    --output-translations="$OUT_DIR/${name}_${reference:-default}/translations/gene_{gene}.translation.fasta" \
-    --output-fasta="$OUT_DIR/${name}_${reference:-default}/aligned.fasta" \
-    --output-tsv="$OUT_DIR/${name}_${reference:-default}/nextclade.tsv" \
-    --output-tree="$OUT_DIR/${name}_${reference:-default}/tree.json" \
-    --output-graph="$OUT_DIR/${name}_${reference:-default}/graph.json" \
-    "$DATASET_DIR/${name}_${reference:-default}/sequences.fasta"
+  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
+    --input-ref="${dataset_dir}/reference.fasta" \
+    --output-translations="${out_dir}/translations/gene_{gene}.translation.fasta" \
+    --output-all="${out_dir}" \
+    "${sequences}"
 }
-export -f run_one_test
+export -f run_with_ref_only
 
-function run_many_tests() {
-  parallel --keep-order --jobs=+0 run_one_test ::: "${all_datasets}"
+
+function run_with_ref_and_annotation() {
+  name="${1}"
+  dataset_dir="${2}"
+  sequences="${3}"
+  out_dir="${RESULTS_DIR}/${name}/with_ref_and_annotation"
+
+  if [ ! -f "${dataset_dir}/genome_annotation.gff3" ]; then return; fi
+
+  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
+    --input-ref="${dataset_dir}/reference.fasta" \
+    --input-annotation="${dataset_dir}/genome_annotation.gff3" \
+    --output-translations="${out_dir}/translations/gene_{gene}.translation.fasta" \
+    --output-all="${out_dir}" \
+    "${sequences}"
 }
-export -f run_many_tests
+export -f run_with_ref_and_annotation
+
 
+function run_with_ref_and_tree() {
+  name="${1}"
+  dataset_dir="${2}"
+  sequences="${3}"
+  out_dir="${RESULTS_DIR}/${name}/with_ref_and_tree"
 
+  if [ ! -f "${dataset_dir}/tree.json" ]; then return; fi
+
+  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
+    --input-ref="${dataset_dir}/reference.fasta" \
+    --input-tree="${dataset_dir}/tree.json" \
+    --output-translations="${out_dir}/translations/gene_{gene}.translation.fasta" \
+    --output-all="${out_dir}" \
+    "${sequences}"
+}
+export -f run_with_ref_and_tree
 
 
-# Additional experimental datasets. This should probably be removed after release of this branch.
-export DATASET_DIR_EXPERIMENTAL="$THIS_DIR/../test_datasets"
-export all_experimental_datasets="$(ls "${DATASET_DIR_EXPERIMENTAL}")"
+function run_with_ref_and_annotation_and_tree() {
+  name="${1}"
+  dataset_dir="${2}"
+  sequences="${3}"
+  out_dir="${RESULTS_DIR}/${name}/with_ref_and_annotation_and_tree"
 
-function run_one_experimental_test() {
-  name=$1
-  reference="default"
+  if [ ! -f "${dataset_dir}/genome_annotation.gff3" ]; then return; fi
+  if [ ! -f "${dataset_dir}/tree.json" ]; then return; fi
 
-  # This dataset is crashing, probably due to defective or unsupported features
-  if [ "$name" == "vic_na" ]; then
+  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
+    --input-ref="${dataset_dir}/reference.fasta" \
+    --input-annotation="${dataset_dir}/genome_annotation.gff3" \
+    --input-tree="${dataset_dir}/tree.json" \
+    --output-translations="${out_dir}/translations/gene_{gene}.translation.fasta" \
+    --output-all="${out_dir}" \
+    "${sequences}"
+}
+export -f run_with_ref_and_annotation_and_tree
+
+
+function run_single_dataset() {
+  dataset_dir=$1
+  name="$(realpath --relative-to="$INPUT_DATASETS_DIR" "$dataset_dir")"
+
+  # This dataset is crashing, due to a defect in the dataset's genome annotation
+  if [[ "$name" =~ nextstrain/vic_na* ]]; then
     return
   fi
-  # this isn't actually a dataset
-  if [ "$name" == "seed_matching" ]; then
+
+  # This dataset is crashing, due to a defect in the dataset
+  if [[ "$name" =~ nextstrain/sars-cov-2-21L* ]]; then
     return
   fi
 
-  sequences="${DATASET_DIR_EXPERIMENTAL}/${name}/files/sequences.fasta"
+  # This dataset is crashing, due to a defect in the dataset's genome annotation
+  if [[ "$name" =~ nextstrain/flu/h3n2/ha* ]]; then
+    return
+  fi
+
+  sequences="$dataset_dir/sequences.fasta"
   msg_no_sequences=""
   if [ ! -f "${sequences}" ]; then
-    sequences="${DATASET_DIR_EXPERIMENTAL}/${name}/files/reference.fasta"
+    sequences="$dataset_dir/reference.fasta"
     msg_no_sequences=" (Note: this dataset contains no example sequences. Using reference sequence as query.)"
   fi
 
-  echo "Running '${NEXTCLADE_BIN}' for '${name}' with reference '${reference:-default}'${msg_no_sequences}"
+  echo "Running '${NEXTCLADE_BIN}' for '${name}'${msg_no_sequences}"
 
-  ${NEXTCLADE_BIN} run --quiet --in-order --include-reference \
-    --input-dataset="${DATASET_DIR_EXPERIMENTAL}/${name}/files" \
-    --output-translations="$OUT_DIR/${name}_${reference:-default}/translations/gene_{gene}.translation.fasta" \
-    --output-fasta="$OUT_DIR/${name}_${reference:-default}/aligned.fasta" \
-    --output-tsv="$OUT_DIR/${name}_${reference:-default}/nextclade.tsv" \
-    --output-tree="$OUT_DIR/${name}_${reference:-default}/tree.json" \
-    --output-graph="$OUT_DIR/${name}_${reference:-default}/graph.json" \
-    "${sequences}"
+  run_with_dataset_dir                    "${name}" "${dataset_dir}" "${sequences}"
+  run_with_dataset_zip                    "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_only                       "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_and_annotation             "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_and_tree                   "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_and_annotation_and_tree    "${name}" "${dataset_dir}" "${sequences}"
 }
-export -f run_one_experimental_test
+export -f run_single_dataset
 
-function run_many_experimental_tests() {
-  parallel --keep-order --jobs=+0 run_one_experimental_test ::: "${all_experimental_datasets}"
-}
-export -f run_many_experimental_tests
 
+function download_and_run_single_dataset() {
+  set -o errtrace
+
+  name=$1
+  dataset_dir="${DATASETS_DIR}/${name}"
+
+  # This dataset is crashing, due to a defect in the dataset's genome annotation
+  if [[ "$name" =~ nextstrain/vic_na* ]]; then
+    return
+  fi
+
+  # This dataset is crashing, due to a defect in the dataset
+  if [[ "$name" =~ nextstrain/sars-cov-2-21L* ]]; then
+    return
+  fi
+
+  # This dataset is crashing, due to a defect in the dataset's genome annotation
+  if [[ "$name" =~ nextstrain/flu/h3n2/ha* ]]; then
+    return
+  fi
+
+  ${NEXTCLADE_BIN} dataset get --name="${name}" --output-dir="$dataset_dir"
 
+  ${NEXTCLADE_BIN} dataset get --name="${name}" --output-zip="$dataset_dir/dataset.zip"
+
+  sequences="$dataset_dir/sequences.fasta"
+  msg_no_sequences=""
+  if [ ! -f "${sequences}" ]; then
+    sequences="$dataset_dir/reference.fasta"
+    msg_no_sequences=" (Note: this dataset contains no example sequences. Using reference sequence as query.)"
+  fi
+
+  echo "Running '${NEXTCLADE_BIN}' for '${name}'${msg_no_sequences}"
+
+  run_with_dataset_dir                    "${name}" "${dataset_dir}" "${sequences}"
+  run_with_dataset_zip                    "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_only                       "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_and_annotation             "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_and_tree                   "${name}" "${dataset_dir}" "${sequences}"
+  run_with_ref_and_annotation_and_tree    "${name}" "${dataset_dir}" "${sequences}"
+}
+export -f download_and_run_single_dataset
 
-parallel --ungroup --keep-order --jobs=2 ::: run_many_tests run_many_experimental_tests
 
+if [ -z "${INPUT_DATASETS_DIR}" ]; then
+  all_datasets=$(${NEXTCLADE_BIN} dataset list --include-experimental --include-community --include-deprecated --only-names)
+  parallel --keep-order --jobs=+0 download_and_run_single_dataset ::: "${all_datasets}"
+else
+  find "${INPUT_DATASETS_DIR}" -iname "pathogen.json" -exec dirname '{}' \; |  parallel --keep-order --jobs=+0 run_single_dataset
+fi
diff --git a/tests/test-linux-distros b/tests/test-linux-distros
index d48433a0c..cb2008a85 100755
--- a/tests/test-linux-distros
+++ b/tests/test-linux-distros
@@ -14,7 +14,8 @@ function abspath() {
 }
 
 export NEXTCLADE_BIN="${1:? "Usage: ${0} <path_to_nextclade>"}"
-export VOLUME="$(abspath "${NEXTCLADE_BIN}"):/nextclade"
+VOLUME="$(abspath "${NEXTCLADE_BIN}"):/nextclade"
+export VOLUME
 export RUN_COMMAND="/nextclade dataset list >/dev/null"
 
 distros=(